Advertisement
Masoko

alias-whois.py

Apr 8th, 2016
132
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 10.73 KB | None | 0 0
  1. #!/usr/bin/python
  2. # coding=utf-8
  3.  
  4. import zipfile, re, os, fnmatch, tldextract, time , datetime, pythonwhois, sys, urllib, subprocess ,urllib2
  5. from urlparse import urlparse
  6. from dateutil import parser
  7. from tldextract.tldextract import LOG
  8. import logging
  9. logging.basicConfig(level=logging.ERROR)
  10.  
  11. dir = "/home/pi/scripts/ecm/site-urls-extract/archives/" # dir for aliases zip arhives
  12. report_dir = "/home/pi/scripts/ecm/reports/"
  13. targetfile = "aliases" # this is the name of the file that contains the aliases
  14. hostslist = []
  15. report_file_name = report_dir + "aliases_whois_report.txt"
  16. exp_cap = 60 # days
  17. delay_sec = 20 #delay between whois checks in seconds
  18.  
  19. ok_domains = []
  20. ex_domains = [] # domains that will expire in less than exp_cap / 60 days
  21. nd_domains = [] # domains with no whois data
  22. er_domains = [] # domains that block the auto whois and require ctr-c
  23. avoid_domains = []
  24. #avoid_domains.append('george.com')
  25.  
  26. class Printer():
  27.     """Print things to stdout on one line dynamically"""
  28.     def __init__(self,data):
  29.         sys.stdout.write("\r\x1b[K"+data.__str__())
  30.         sys.stdout.flush()
  31.        
  32. def wait_sec(seconds):
  33.     for i in reversed( range(seconds) ):
  34.         bar = "█" * (seconds-i) + "▒"* i +" "+ str(i)+"s"
  35.         #firstpart, secondpart = bar[:len(bar)/2], bar[len(bar)/2:]
  36.         #bar = "{}{:^4}{}".format(firstpart,str(i)+"s",secondpart)
  37.         Printer("Waiting for {1} seconds. {2}".format(i,seconds,bar))
  38.         time.sleep(1)
  39.     Printer("Done waiting for {0} seconds".format(seconds))
  40.     print
  41.     return
  42.  
  43. def find_files(directory, pattern):
  44.     for root, dirs, files in os.walk(directory):
  45.         for basename in files:
  46.             if fnmatch.fnmatch(basename, pattern):
  47.                 filename = os.path.join(root, basename)
  48.                 yield filename
  49.  
  50. def get_hosts(dir):
  51.     for filename in find_files(dir, "*.zip"):
  52.         zf = zipfile.ZipFile(filename, 'r')
  53.         for i in zf.namelist():
  54.             if i.split("/")[-1] == targetfile:
  55.                 aliasesjson = zf.read(i)
  56.                 urls = re.findall('(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', aliasesjson)
  57.                 for url in urls:
  58.                     if len(url) > 5:
  59.                         if "." in url and "/" not in url and "." not in url[-1]:
  60.                             hostslist.append(url)
  61.                            
  62.     return list(set(hostslist)) #removes duplicating hosts
  63.  
  64. def generate_reports(all_domains,ex_domains,ok_domains,nd_domains):
  65.     fo = open(report_file_name, "w+")
  66.     fo.write("Expiration Date Report For Domains Hosted at ECM DW Instances\n\n")
  67.  
  68.     fo.write("\n"+str(len(ex_domains))+" domains that will expire soon\n\n")
  69.     for domain in ex_domains:#print all domains
  70.         fo.write(str(domain[0])+"  "+domain[1]+"\n")
  71.        
  72.     fo.write("\n"+str(len(all_domains))+" domains found in DW aliases backups\n\n")
  73.     for domain in all_domains:#print all domains
  74.         fo.write(domain+"\n")
  75.                
  76.     fo.write("\n"+str(len(ok_domains))+" Successfully checked domains with their expiration date \n\n")
  77.     for domain in ok_domains:#print all domains
  78.         fo.write(str(domain[0])+"  "+str(domain[1])+"\n")
  79.    
  80.     fo.write("\n"+str(len(nd_domains))+" domains tha can't be checked\n\n")
  81.     for domain in nd_domains:#print all domains
  82.         fo.write(domain+"\n")
  83.        
  84.     for domain in avoid_domains:#print all domains
  85.         fo.write(domain+"\n")
  86.        
  87.     fo.close()
  88.     print "Report Generated"
  89.     return()
  90.  
  91. def get_domains(aliases_hosts):
  92.     tld_domains = []
  93.  
  94.     for host in aliases_hosts:
  95.         parsed_uri = list(tldextract.extract(host))
  96.         domain = parsed_uri[1]+"."+parsed_uri[2]
  97.         #print domain + " ----------- " + host
  98.         tld_domains.append(parsed_uri[1]+"."+parsed_uri[2])
  99.  
  100.     tld_domains= list(set(tld_domains))
  101.     return (tld_domains)
  102.  
  103. #------------------ custom domain ext. functions ---------------------------------
  104. def co_kr_expdate(domain):
  105.     try:
  106.         domain_data = pythonwhois.get_whois(domain)
  107.     except:
  108.         print "can't get {} whois data".format(domain)
  109.     else:
  110.         s = str(domain_data)
  111.         exp_date = re.findall(re.escape('Expiration Date')+"(.*)"+re.escape('nPublishes'),s)[0][:-2].replace(" ", "").replace(".", "/").split(":")[1]
  112.         dt = parser.parse(exp_date)
  113.         return dt
  114.  
  115. def pl_expdate(domain):
  116.     try:
  117.         domain_data = pythonwhois.get_whois(domain)
  118.     except:
  119.         print "can't get {} whois data".format(domain)
  120.     else:
  121.         s = str(domain_data)
  122.         exp_date = re.findall(re.escape('renewal date:')+"(.*)"+re.escape('dnssec'),s)[0].split("\\n")[0]
  123.         dt = parser.parse(exp_date)
  124.         return dt
  125. def kr_expdate(domain):
  126.     try:
  127.         domain_data = pythonwhois.get_whois(domain)
  128.     except:
  129.         print "can't get {} whois data".format(domain)
  130.     else:
  131.         s = str(domain_data)
  132.         exp_date = re.findall(re.escape('Expiration Date')+"(.*)"+re.escape('n'),s)[0].split("\\")[0]
  133.         dt = parser.parse(exp_date)
  134.         return dt
  135.        
  136. def com_pl_expdate(domain):
  137.     try:
  138.         domain_data = pythonwhois.get_whois(domain)
  139.     except:
  140.         print "can't get {} whois data".format(domain)
  141.     else:
  142.         s = str(domain_data)
  143.         exp_date = re.findall(re.escape('renewal date:')+"(.*)"+re.escape('n'),s)[0].split("\\n")[0]
  144.         dt = parser.parse(exp_date)
  145.         return dt
  146.        
  147. def org_expdate(domain):
  148.     try:
  149.         domain_data = os.popen('whois '+domain).read()
  150.     except:
  151.         print "can't get {} whois data".format(domain)
  152.     else:
  153.         s = str(domain_data)
  154.         exp_date = re.findall(re.escape('Registry Expiry Date:')+"(.*)"+re.escape('T'),s)[0]
  155.         dt = parser.parse(exp_date)
  156.         return dt
  157.        
  158. def com_cn_expdate(domain):
  159.     try:
  160.         domain_data = pythonwhois.get_whois(domain)
  161.     except:
  162.         print "can't get {} whois data".format(domain)
  163.     else:
  164.         s = str(domain_data)
  165.         exp_date = re.findall(re.escape('Expiration Time:')+"(.*)"+re.escape('DNSSEC'),s)[0][:-2]
  166.         dt = parser.parse(exp_date)
  167.         return dt  
  168.  
  169. def pt_expdate(domain):
  170.     try:
  171.         domain_data = os.popen('whois '+domain).read()
  172.     except:
  173.         print "can't get {} whois data".format(domain)
  174.     else:
  175.         s = str(domain_data).splitlines()[2].split(": ")[1].split("/")
  176.         #print s
  177.         exp_date = s[1]+"/"+s[0]+"/"+s[2]
  178.         dt = parser.parse(exp_date)
  179.         return dt  
  180.        
  181.        
  182. def check_exp_date(tld_domains,delay_sec):
  183.     def days_left(xdomain,expdate):
  184.         diff = (expdate - datetime.datetime.today()).days
  185.         print" exp. date {}  - {!s} days until expiration".format(str(expdate)[:10],diff)
  186.         if diff < exp_cap: # check if domain will expire soon
  187.             ex_domains.append([xdomain,str(expdate)[:10]])
  188.         else:
  189.             ok_domains.append([xdomain,str(expdate)[:10]])
  190.         wait_sec(delay_sec)
  191.     print "--------- checking {} domains this will take aprox {} min -----------".format(len(tld_domains),(len(tld_domains)*(delay_sec+2))/60)
  192.     for xdomain in tld_domains:
  193.         print xdomain
  194.         if ".co.kr" in xdomain or ".kr" in xdomain :
  195.             print " ============ .co.kr -------------"
  196.             expdate = co_kr_expdate(xdomain)
  197.             days_left(xdomain,expdate)
  198.         elif ".org" in xdomain:
  199.             print " ============ .org -------------"
  200.             expdate = org_expdate(xdomain)
  201.             days_left(xdomain,expdate)
  202.         elif ".info" in xdomain:
  203.             print " ============ .info -------------"
  204.             expdate = org_expdate(xdomain)
  205.             days_left(xdomain,expdate)
  206.         elif ".com.cn" in xdomain or ".cn" in xdomain:
  207.             print " ==={}== .com.cn or .cn -------------".format(xdomain)
  208.             try:
  209.                 expdate = com_cn_expdate(xdomain)
  210.                 diff = (expdate - datetime.datetime.today()).days
  211.                 print" exp. date {}  - {!s} days until expiration".format(str(expdate)[:10],diff)
  212.                 if diff < exp_cap: # check if domain will expire soon
  213.                     ex_domains.append([xdomain,str(expdate)[:10]])
  214.                 else:
  215.                     ok_domains.append([xdomain,str(expdate)[:10]]) 
  216.             except:
  217.                 print " No whois data available for "+xdomain
  218.                 nd_domains.append(xdomain)
  219.         elif ".pt" in xdomain or ".com.pt" in xdomain:
  220.             print " ============ .pt -------------"
  221.             expdate = pt_expdate(xdomain)
  222.             days_left(xdomain,expdate)
  223.         elif ".pl" in xdomain and ".com.pl" not in xdomain:
  224.             print " ============ .pl -------------"
  225.             expdate = pl_expdate(xdomain)
  226.             days_left(xdomain,expdate)
  227.         elif ".com.pl" in xdomain:
  228.             print " ============ .com.pl -------------"
  229.             expdate = com_pl_expdate(xdomain)
  230.             days_left(xdomain,expdate)
  231.         elif ".com.br" in xdomain:
  232.             print " ========skipping==== .com.br -------------"
  233.             print " No whois data available for "+xdomain
  234.             nd_domains.append(xdomain)             
  235.         elif ".ch" in xdomain:
  236.             print " ========skipping==== .ch -------------"
  237.             print " No whois data available for "+xdomain
  238.             nd_domains.append(xdomain)
  239.         elif ".ca" in xdomain:
  240.             print " ========skipping==== .ca http://whois.cira.ca/ -------------"
  241.             print " No whois data available for "+xdomain
  242.             nd_domains.append(xdomain)
  243.         elif ".com.my" in xdomain:
  244.             print " ========skipping==== .ca http://whois.cira.ca/ -------------"
  245.             print " No whois data available for "+xdomain
  246.             nd_domains.append(xdomain)
  247.         elif ".com.au" in xdomain:
  248.             print " ========skipping==== .com.au -------------"
  249.             print " No whois data available for "+xdomain
  250.             nd_domains.append(xdomain)
  251.         elif ".es" in xdomain:
  252.             print " ========skipping==== .es -------------"
  253.             print " No whois data available for "+xdomain
  254.             nd_domains.append(xdomain)
  255.         elif ".de" in xdomain:
  256.             print " ========skipping==== .de -------------"
  257.             print " No whois data available for "+xdomain
  258.             nd_domains.append(xdomain)
  259.            
  260.         else:
  261.             print "---------- checking {0} -----------".format(xdomain)
  262.             try:
  263.                 xdomain_data = pythonwhois.get_whois(xdomain)
  264.                 expdate = xdomain_data['expiration_date'][0]
  265.                 diff = (expdate - datetime.datetime.today()).days
  266.                 print" exp. date {}  - {!s} days until expiration".format(str(expdate)[:10],diff)
  267.                 if diff < exp_cap: # check if domain will expire soon
  268.                     ex_domains.append([xdomain,str(expdate)[:10]]) #mark domain as expiring
  269.                 else:
  270.                     ok_domains.append([xdomain,str(expdate)[:10]]) #mark domain as good
  271.             except KeyboardInterrupt:
  272.                 er_domains.append(xdomain)
  273.             except:
  274.                 print " No whois data available for "+xdomain
  275.                 nd_domains.append(xdomain) #mark domain as no exp. data
  276.                 wait_sec(delay_sec)
  277.             else:
  278.                 wait_sec(delay_sec)
  279.         print " {0} from {1} domains checked.".format(len(ok_domains)+len(er_domains)+len(ex_domains)+len(nd_domains), len(tld_domains))
  280.  
  281.     return ex_domains,ok_domains,nd_domains,er_domains
  282.  
  283. print " Getting hosts from aliases ..."
  284. aliases_hosts = get_hosts(dir)
  285. print "{0} hosts found in aliases".format(len(aliases_hosts))
  286. print "Separating domains from hosts ..."
  287. tld_domains = get_domains(aliases_hosts)
  288. print "{0} domains found in these hosts".format(len(tld_domains))
  289.  
  290. all_domains = tld_domains
  291. print "Removing {} known bad domains".format(str(len(avoid_domains)))
  292. for remove_domain in avoid_domains:
  293.     try:
  294.         del tld_domains[tld_domains.index(remove_domain)]
  295.     except:
  296.         pass
  297.     else:
  298.         print " {} removed from list".format(remove_domain)
  299.  
  300. ex_domains,ok_domains,nd_domains, er_domains = check_exp_date(tld_domains,20)# check exp. date
  301.    
  302. print "Generating reports in " +report_file_name
  303. generate_reports(all_domains,ex_domains,ok_domains,nd_domains)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement