Advertisement
AlbertFaust

url.py

Jun 2nd, 2015
264
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.52 KB | None | 0 0
  1. #! /usr/bin/env python3
  2. from bs4 import BeautifulSoup
  3. from urllib.request import urlopen                                                                                                                              
  4. url="http://github.com/AlbertFaust?tab=repositories"
  5.  
  6. def recursiveURL(url,depth):
  7.     print(url)
  8.     if depth == 5 or url is None:
  9.         return url
  10.     else:
  11.         page=urlopen(url)
  12.         soup=BeautifulSoup(page.read())
  13.         new = soup.find('a')
  14.         if len(new) == 0:
  15.             return url
  16.         else:
  17.             return url, recursiveURL(new, depth+1)
  18. def links(url):
  19.     page=urlopen(url)
  20.     soup=BeautifulSoup(page.read())
  21.     link=soup.find_all('a')
  22.     for i in link:
  23.         link.append(recursiveURL(i,0))
  24.         return link
  25.  
  26. recursiveURL(url,1)
  27. print(links(url))
  28.  
  29. ''' errors
  30. http://github.com/AlbertFaust?tab=repositories
  31. <a class="accessibility-aid js-skip-to-content" href="#start-of-content" tabindex="1">Skip to content</a>
  32. Traceback (most recent call last):
  33.  File "URLScape.py", line 26, in <module>
  34.    recursiveURL(url,1)
  35.  File "URLScape.py", line 17, in recursiveURL
  36.    return url, recursiveURL(new, depth+1)
  37.  File "URLScape.py", line 11, in recursiveURL
  38.    page=urlopen(url)
  39.  File "/usr/lib/python3.4/urllib/request.py", line 161, in urlopen
  40.    return opener.open(url, data, timeout)
  41.  File "/usr/lib/python3.4/urllib/request.py", line 458, in open
  42.    meth_name = protocol+"_request"
  43. TypeError: unsupported operand type(s) for +: 'NoneType' and 'str'
  44. '''
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement