Advertisement
AlbertFaust

urlscrape.py

Jun 2nd, 2015
256
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.65 KB | None | 0 0
  1. #! /usr/bin/env python3                                                                                                                                        
  2. from bs4 import BeautifulSoup
  3. from urllib.request import urlopen
  4. url="http://github.com/AlbertFaust?tab=repositories"
  5. page=urlopen(url)
  6. soup = BeautifulSoup(page.read())
  7. for link in soup.find_all('a'):
  8.             print(link.get( 'href'))
  9.  
  10. def recursiveURL(url,depth):
  11.         if depth == 5:
  12.                 return url
  13.         else:
  14.             page=urlopen(url)
  15.             soup=BeautifulSoup(page.read())
  16.             new = soup.find('a')
  17.             if len(new) == 0:
  18.                     return url
  19.             else:
  20.                     return url, recursiveURL(new, depth+1)
  21. def links(url):
  22.         page=urlopen(url)
  23.         soup=BeautifulSoup(page.read())
  24.         link=soup.find_all('a')
  25.         for i in link:
  26.                 link.append(recursiveURL(i,0))
  27.                 return link
  28.  
  29. recursiveURL(url,1)
  30. print(links(url))                                                                                                      
  31.  
  32.  
  33. ''' errors
  34. Traceback (most recent call last):
  35.  File "URLScape.py", line 29, in <module>
  36.    recursiveURL(url,1)
  37.  File "URLScape.py", line 20, in recursiveURL
  38.    return url, recursiveURL(new, depth+1)
  39.  File "URLScape.py", line 14, in recursiveURL
  40.    page=urlopen(url)
  41.  File "/usr/lib/python3.4/urllib/request.py", line 161, in urlopen
  42.    return opener.open(url, data, timeout)
  43.  File "/usr/lib/python3.4/urllib/request.py", line 458, in open
  44.    meth_name = protocol+"_request"
  45. TypeError: unsupported operand type(s) for +: 'NoneType' and 'str'
  46. '''
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement