Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python2
- import sys
- import urllib2
- import re
- headers = {'User-Agent' : 'Mozilla/5.0' }
- def getLinks(addr):
- req=urllib2.Request(addr, None, headers)
- contents=""
- try:
- c=urllib2.urlopen(req)
- contents=c.read()
- except urllib2.HTTPError as e:
- return None
- p = re.compile('/wiki/[\w()]+"')
- return set(p.findall(contents))
- #for s in getLinks('http://en.wikipedia.org/wiki/Dexter_(TV_series)') :
- for s in getLinks(sys.argv[1]) :
- print "http://en.wikipedia.org" + s[:-1]
- links = getLinks("http://en.wikipedia.org" + s[:-1])
- if links != None :
- for s2 in links :
- print s2
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement