Advertisement
wolfsinner

linkgetter

Jul 4th, 2011
388
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.05 KB | None | 0 0
  1. #!/usr/bin/env python
  2. #Call this script with the -u flag followed by a url and the -d flag followed by a domain (without the http://, the www. is optional)
  3. #This script returns all unique links from the given domain inside the URL's source.
  4. import urllib2, sys, re;
  5.  
  6. #FLAGS
  7. URL = "";
  8. DOMAIN = "";
  9. valid = True;
  10. args = sys.argv;
  11. #TODO: URL validation, but whatever! It's easy if you really want it.
  12. #Could've used getopt, but with just 2 parameters and a simple script, no point.
  13. try:
  14.     for i in range(1, len(args)):
  15.         if args[i] == "-u": URL = args[i+1];
  16.         if args[i] == "-d": DOMAIN = args[i+1];
  17. except IndexError:
  18.     valid = False;
  19. #EOFLAGS
  20.  
  21. #TODO: Exception handling, but whatever! It's easy if you really want it. :P
  22. if URL != "" and DOMAIN != "" and valid:
  23.     pattern = re.compile("(https?://(www.)?%s[-A-Za-z0-9\?=\._!/]*)" % (DOMAIN));
  24.     res = set(pattern.findall((urllib2.urlopen(URL)).read()));
  25.     print "Found %s links." % (len(res));
  26.     ls = "";
  27.     for i in res:
  28.         ls += "%s%s" % (i[0], "\n");
  29.     print ls.rstrip();
  30. else: print "Invalid Parameters.";
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement