Advertisement
Guest User

Untitled

a guest
May 24th, 2017
77
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.52 KB | None | 0 0
  1. #!/usr/bin/python
  2.  
  3. import sgmllib
  4. import sys
  5.  
  6. class URLLister(sgmllib.SGMLParser):        # Simple SGML Parser for html href extraction
  7.     def reset(self):
  8.         sgmllib.SGMLParser.reset(self)
  9.         self.urls = []
  10.  
  11.     def start_a(self, attrs):
  12.         href = [v for k, v in attrs if k=='href']
  13.         if href:
  14.             self.urls.extend(href)
  15.  
  16.  
  17.  
  18. f = open(sys.argv[1],"r").read()
  19. parser = URLLister()
  20. parser.feed(f)
  21. parser.close()
  22. for url in parser.urls:
  23.     if url.find("http") >= 0:
  24.         print url
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement