Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- #Call this script with the -u flag followed by a url and the -d flag followed by a domain (without the http://, the www. is optional)
- #This script returns all unique links from the given domain inside the URL's source.
- import urllib2, sys, re;
- #FLAGS
- URL = "";
- DOMAIN = "";
- valid = True;
- args = sys.argv;
- #TODO: URL validation, but whatever! It's easy if you really want it.
- #Could've used getopt, but with just 2 parameters and a simple script, no point.
- try:
- for i in range(1, len(args)):
- if args[i] == "-u": URL = args[i+1];
- if args[i] == "-d": DOMAIN = args[i+1];
- except IndexError:
- valid = False;
- #EOFLAGS
- #TODO: Exception handling, but whatever! It's easy if you really want it. :P
- if URL != "" and DOMAIN != "" and valid:
- pattern = re.compile("(https?://(www.)?%s[-A-Za-z0-9\?=\._!/]*)" % (DOMAIN));
- res = set(pattern.findall((urllib2.urlopen(URL)).read()));
- print "Found %s links." % (len(res));
- ls = "";
- for i in res:
- ls += "%s%s" % (i[0], "\n");
- print ls.rstrip();
- else: print "Invalid Parameters.";
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement