Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import sys
- import requests
- import re
- def check_for_redirects(url):
- try:
- r = requests.get(url if url.startswith('http') else "http://%s" % url, allow_redirects=True, timeout=7)
- if strip_url(r.url) == strip_url(url):
- return url
- else:
- return strip_url(r.url)
- except requests.exceptions.Timeout:
- return 'expired'
- except requests.exceptions.ConnectionError:
- return 'expired'
- def strip_url(url):
- url = url.replace("https://", "")
- url = url.replace("http://", "")
- try:
- url = re.split("www\d*.", url)[1]
- except:
- pass
- first_slash = url.find('/')
- if first_slash != -1:
- url = url[:first_slash]
- return url.strip()
- def check_domains(urls, f):
- for url in urls:
- redirect_url = check_for_redirects(url)
- print("%s - %s" % (url, redirect_url))
- f.write("%s - %s" % (url, redirect_url) + '\n')
- if __name__ == '__main__':
- fname = 'domain_list.txt'
- out1 = open("out1", "w")
- try:
- fname = sys.argv[1]
- except IndexError:
- pass
- urls = (l.strip() for l in open(fname).readlines())
- check_domains(urls, out1)
- out1.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement