Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- # coding=utf-8
- import zipfile, re, os, fnmatch, tldextract, time , datetime, pythonwhois, sys, urllib, subprocess ,urllib2
- from urlparse import urlparse
- from dateutil import parser
- from tldextract.tldextract import LOG
- import logging
- logging.basicConfig(level=logging.ERROR)
- dir = "/home/pi/scripts/ecm/site-urls-extract/archives/" # dir for aliases zip arhives
- report_dir = "/home/pi/scripts/ecm/reports/"
- targetfile = "aliases" # this is the name of the file that contains the aliases
- hostslist = []
- report_file_name = report_dir + "aliases_whois_report.txt"
- exp_cap = 60 # days
- delay_sec = 20 #delay between whois checks in seconds
- ok_domains = []
- ex_domains = [] # domains that will expire in less than exp_cap / 60 days
- nd_domains = [] # domains with no whois data
- er_domains = [] # domains that block the auto whois and require ctr-c
- avoid_domains = []
- #avoid_domains.append('george.com')
- class Printer():
- """Print things to stdout on one line dynamically"""
- def __init__(self,data):
- sys.stdout.write("\r\x1b[K"+data.__str__())
- sys.stdout.flush()
- def wait_sec(seconds):
- for i in reversed( range(seconds) ):
- bar = "█" * (seconds-i) + "▒"* i +" "+ str(i)+"s"
- #firstpart, secondpart = bar[:len(bar)/2], bar[len(bar)/2:]
- #bar = "{}{:^4}{}".format(firstpart,str(i)+"s",secondpart)
- Printer("Waiting for {1} seconds. {2}".format(i,seconds,bar))
- time.sleep(1)
- Printer("Done waiting for {0} seconds".format(seconds))
- print
- return
- def find_files(directory, pattern):
- for root, dirs, files in os.walk(directory):
- for basename in files:
- if fnmatch.fnmatch(basename, pattern):
- filename = os.path.join(root, basename)
- yield filename
- def get_hosts(dir):
- for filename in find_files(dir, "*.zip"):
- zf = zipfile.ZipFile(filename, 'r')
- for i in zf.namelist():
- if i.split("/")[-1] == targetfile:
- aliasesjson = zf.read(i)
- urls = re.findall('(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', aliasesjson)
- for url in urls:
- if len(url) > 5:
- if "." in url and "/" not in url and "." not in url[-1]:
- hostslist.append(url)
- return list(set(hostslist)) #removes duplicating hosts
- def generate_reports(all_domains,ex_domains,ok_domains,nd_domains):
- fo = open(report_file_name, "w+")
- fo.write("Expiration Date Report For Domains Hosted at ECM DW Instances\n\n")
- fo.write("\n"+str(len(ex_domains))+" domains that will expire soon\n\n")
- for domain in ex_domains:#print all domains
- fo.write(str(domain[0])+" "+domain[1]+"\n")
- fo.write("\n"+str(len(all_domains))+" domains found in DW aliases backups\n\n")
- for domain in all_domains:#print all domains
- fo.write(domain+"\n")
- fo.write("\n"+str(len(ok_domains))+" Successfully checked domains with their expiration date \n\n")
- for domain in ok_domains:#print all domains
- fo.write(str(domain[0])+" "+str(domain[1])+"\n")
- fo.write("\n"+str(len(nd_domains))+" domains tha can't be checked\n\n")
- for domain in nd_domains:#print all domains
- fo.write(domain+"\n")
- for domain in avoid_domains:#print all domains
- fo.write(domain+"\n")
- fo.close()
- print "Report Generated"
- return()
- def get_domains(aliases_hosts):
- tld_domains = []
- for host in aliases_hosts:
- parsed_uri = list(tldextract.extract(host))
- domain = parsed_uri[1]+"."+parsed_uri[2]
- #print domain + " ----------- " + host
- tld_domains.append(parsed_uri[1]+"."+parsed_uri[2])
- tld_domains= list(set(tld_domains))
- return (tld_domains)
- #------------------ custom domain ext. functions ---------------------------------
- def co_kr_expdate(domain):
- try:
- domain_data = pythonwhois.get_whois(domain)
- except:
- print "can't get {} whois data".format(domain)
- else:
- s = str(domain_data)
- exp_date = re.findall(re.escape('Expiration Date')+"(.*)"+re.escape('nPublishes'),s)[0][:-2].replace(" ", "").replace(".", "/").split(":")[1]
- dt = parser.parse(exp_date)
- return dt
- def pl_expdate(domain):
- try:
- domain_data = pythonwhois.get_whois(domain)
- except:
- print "can't get {} whois data".format(domain)
- else:
- s = str(domain_data)
- exp_date = re.findall(re.escape('renewal date:')+"(.*)"+re.escape('dnssec'),s)[0].split("\\n")[0]
- dt = parser.parse(exp_date)
- return dt
- def kr_expdate(domain):
- try:
- domain_data = pythonwhois.get_whois(domain)
- except:
- print "can't get {} whois data".format(domain)
- else:
- s = str(domain_data)
- exp_date = re.findall(re.escape('Expiration Date')+"(.*)"+re.escape('n'),s)[0].split("\\")[0]
- dt = parser.parse(exp_date)
- return dt
- def com_pl_expdate(domain):
- try:
- domain_data = pythonwhois.get_whois(domain)
- except:
- print "can't get {} whois data".format(domain)
- else:
- s = str(domain_data)
- exp_date = re.findall(re.escape('renewal date:')+"(.*)"+re.escape('n'),s)[0].split("\\n")[0]
- dt = parser.parse(exp_date)
- return dt
- def org_expdate(domain):
- try:
- domain_data = os.popen('whois '+domain).read()
- except:
- print "can't get {} whois data".format(domain)
- else:
- s = str(domain_data)
- exp_date = re.findall(re.escape('Registry Expiry Date:')+"(.*)"+re.escape('T'),s)[0]
- dt = parser.parse(exp_date)
- return dt
- def com_cn_expdate(domain):
- try:
- domain_data = pythonwhois.get_whois(domain)
- except:
- print "can't get {} whois data".format(domain)
- else:
- s = str(domain_data)
- exp_date = re.findall(re.escape('Expiration Time:')+"(.*)"+re.escape('DNSSEC'),s)[0][:-2]
- dt = parser.parse(exp_date)
- return dt
- def pt_expdate(domain):
- try:
- domain_data = os.popen('whois '+domain).read()
- except:
- print "can't get {} whois data".format(domain)
- else:
- s = str(domain_data).splitlines()[2].split(": ")[1].split("/")
- #print s
- exp_date = s[1]+"/"+s[0]+"/"+s[2]
- dt = parser.parse(exp_date)
- return dt
- def check_exp_date(tld_domains,delay_sec):
- def days_left(xdomain,expdate):
- diff = (expdate - datetime.datetime.today()).days
- print" exp. date {} - {!s} days until expiration".format(str(expdate)[:10],diff)
- if diff < exp_cap: # check if domain will expire soon
- ex_domains.append([xdomain,str(expdate)[:10]])
- else:
- ok_domains.append([xdomain,str(expdate)[:10]])
- wait_sec(delay_sec)
- print "--------- checking {} domains this will take aprox {} min -----------".format(len(tld_domains),(len(tld_domains)*(delay_sec+2))/60)
- for xdomain in tld_domains:
- print xdomain
- if ".co.kr" in xdomain or ".kr" in xdomain :
- print " ============ .co.kr -------------"
- expdate = co_kr_expdate(xdomain)
- days_left(xdomain,expdate)
- elif ".org" in xdomain:
- print " ============ .org -------------"
- expdate = org_expdate(xdomain)
- days_left(xdomain,expdate)
- elif ".info" in xdomain:
- print " ============ .info -------------"
- expdate = org_expdate(xdomain)
- days_left(xdomain,expdate)
- elif ".com.cn" in xdomain or ".cn" in xdomain:
- print " ==={}== .com.cn or .cn -------------".format(xdomain)
- try:
- expdate = com_cn_expdate(xdomain)
- diff = (expdate - datetime.datetime.today()).days
- print" exp. date {} - {!s} days until expiration".format(str(expdate)[:10],diff)
- if diff < exp_cap: # check if domain will expire soon
- ex_domains.append([xdomain,str(expdate)[:10]])
- else:
- ok_domains.append([xdomain,str(expdate)[:10]])
- except:
- print " No whois data available for "+xdomain
- nd_domains.append(xdomain)
- elif ".pt" in xdomain or ".com.pt" in xdomain:
- print " ============ .pt -------------"
- expdate = pt_expdate(xdomain)
- days_left(xdomain,expdate)
- elif ".pl" in xdomain and ".com.pl" not in xdomain:
- print " ============ .pl -------------"
- expdate = pl_expdate(xdomain)
- days_left(xdomain,expdate)
- elif ".com.pl" in xdomain:
- print " ============ .com.pl -------------"
- expdate = com_pl_expdate(xdomain)
- days_left(xdomain,expdate)
- elif ".com.br" in xdomain:
- print " ========skipping==== .com.br -------------"
- print " No whois data available for "+xdomain
- nd_domains.append(xdomain)
- elif ".ch" in xdomain:
- print " ========skipping==== .ch -------------"
- print " No whois data available for "+xdomain
- nd_domains.append(xdomain)
- elif ".ca" in xdomain:
- print " ========skipping==== .ca http://whois.cira.ca/ -------------"
- print " No whois data available for "+xdomain
- nd_domains.append(xdomain)
- elif ".com.my" in xdomain:
- print " ========skipping==== .ca http://whois.cira.ca/ -------------"
- print " No whois data available for "+xdomain
- nd_domains.append(xdomain)
- elif ".com.au" in xdomain:
- print " ========skipping==== .com.au -------------"
- print " No whois data available for "+xdomain
- nd_domains.append(xdomain)
- elif ".es" in xdomain:
- print " ========skipping==== .es -------------"
- print " No whois data available for "+xdomain
- nd_domains.append(xdomain)
- elif ".de" in xdomain:
- print " ========skipping==== .de -------------"
- print " No whois data available for "+xdomain
- nd_domains.append(xdomain)
- else:
- print "---------- checking {0} -----------".format(xdomain)
- try:
- xdomain_data = pythonwhois.get_whois(xdomain)
- expdate = xdomain_data['expiration_date'][0]
- diff = (expdate - datetime.datetime.today()).days
- print" exp. date {} - {!s} days until expiration".format(str(expdate)[:10],diff)
- if diff < exp_cap: # check if domain will expire soon
- ex_domains.append([xdomain,str(expdate)[:10]]) #mark domain as expiring
- else:
- ok_domains.append([xdomain,str(expdate)[:10]]) #mark domain as good
- except KeyboardInterrupt:
- er_domains.append(xdomain)
- except:
- print " No whois data available for "+xdomain
- nd_domains.append(xdomain) #mark domain as no exp. data
- wait_sec(delay_sec)
- else:
- wait_sec(delay_sec)
- print " {0} from {1} domains checked.".format(len(ok_domains)+len(er_domains)+len(ex_domains)+len(nd_domains), len(tld_domains))
- return ex_domains,ok_domains,nd_domains,er_domains
- print " Getting hosts from aliases ..."
- aliases_hosts = get_hosts(dir)
- print "{0} hosts found in aliases".format(len(aliases_hosts))
- print "Separating domains from hosts ..."
- tld_domains = get_domains(aliases_hosts)
- print "{0} domains found in these hosts".format(len(tld_domains))
- all_domains = tld_domains
- print "Removing {} known bad domains".format(str(len(avoid_domains)))
- for remove_domain in avoid_domains:
- try:
- del tld_domains[tld_domains.index(remove_domain)]
- except:
- pass
- else:
- print " {} removed from list".format(remove_domain)
- ex_domains,ok_domains,nd_domains, er_domains = check_exp_date(tld_domains,20)# check exp. date
- print "Generating reports in " +report_file_name
- generate_reports(all_domains,ex_domains,ok_domains,nd_domains)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement