Advertisement
Bladtman

Untitled

Oct 13th, 2012
176
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.58 KB | None | 0 0
  1. #!/usr/bin/python2
  2. import sys
  3. import urllib2
  4. import re
  5.  
  6. headers = {'User-Agent' : 'Mozilla/5.0' }
  7.  
  8. def getLinks(addr):
  9.     req=urllib2.Request(addr, None, headers)
  10.     contents=""
  11.     try:
  12.         c=urllib2.urlopen(req)
  13.         contents=c.read()
  14.     except urllib2.HTTPError as e:
  15.         return None
  16.  
  17.     p = re.compile('/wiki/[\w()]+"')
  18.     return set(p.findall(contents))
  19.  
  20. #for s in getLinks('http://en.wikipedia.org/wiki/Dexter_(TV_series)') :
  21. for s in getLinks(sys.argv[1]) :
  22.     print "http://en.wikipedia.org" + s[:-1]
  23.     links = getLinks("http://en.wikipedia.org" + s[:-1])
  24.     if links != None :
  25.         for s2 in links :
  26.             print s2
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement