Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #! /usr/bin/env python3
- from bs4 import BeautifulSoup
- from urllib.request import urlopen
- url="http://github.com/AlbertFaust?tab=repositories"
- page=urlopen(url)
- soup = BeautifulSoup(page.read())
- for link in soup.find_all('a'):
- print(link.get( 'href'))
- def recursiveURL(url,depth):
- if depth == 5:
- return url
- else:
- page=urlopen(url)
- soup=BeautifulSoup(page.read())
- new = soup.find('a')
- if len(new) == 0:
- return url
- else:
- return url, recursiveURL(new, depth+1)
- def links(url):
- page=urlopen(url)
- soup=BeautifulSoup(page.read())
- link=soup.find_all('a')
- for i in link:
- link.append(recursiveURL(i,0))
- return link
- recursiveURL(url,1)
- print(links(url))
- ''' errors
- Traceback (most recent call last):
- File "URLScape.py", line 29, in <module>
- recursiveURL(url,1)
- File "URLScape.py", line 20, in recursiveURL
- return url, recursiveURL(new, depth+1)
- File "URLScape.py", line 14, in recursiveURL
- page=urlopen(url)
- File "/usr/lib/python3.4/urllib/request.py", line 161, in urlopen
- return opener.open(url, data, timeout)
- File "/usr/lib/python3.4/urllib/request.py", line 458, in open
- meth_name = protocol+"_request"
- TypeError: unsupported operand type(s) for +: 'NoneType' and 'str'
- '''
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement