Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- import sgmllib
- import sys
- class URLLister(sgmllib.SGMLParser): # Simple SGML Parser for html href extraction
- def reset(self):
- sgmllib.SGMLParser.reset(self)
- self.urls = []
- def start_a(self, attrs):
- href = [v for k, v in attrs if k=='href']
- if href:
- self.urls.extend(href)
- f = open(sys.argv[1],"r").read()
- parser = URLLister()
- parser.feed(f)
- parser.close()
- for url in parser.urls:
- if url.find("http") >= 0:
- print url
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement