Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # coding=utf-8
- ##
- # @file make-antizapret.py
- # @brief Script to generate the list of route commands for OpenVPN server to circumvent Russian internet blacklist
- # It also generates some statistics and lists
- #
- import sys, codecs, ctypes, win_unicode_console, requests, pygeoip, geoip2.database, json, csv
- from netaddr import IPAddress, IPNetwork, cidr_merge, all_matching_cidrs
- win_unicode_console.enable()
- cnt2en = {} #Entries by bureau
- cnt2ip = {} #IPs by bureau
- cnt2delo = {} #Entries by суд
- cnt2deloIP = {} #IPs by суд
- domains = {}
- #Special lists
- cnt = {}
- cnt['Total Cloudflare'] = 0
- cnt['Total AmazonAWS'] = 0
- cnt['Total DO'] = 0
- cnt['Total Cloudflare by IP'] = 0
- cnt['Total AmazonAWS by IP'] = 0
- cnt['Total DO by IP'] = 0
- #entries
- cnt['Total Entries']=0
- cnt['IP-only Entries']=0 #IP only
- cnt['Domain-only Entries']=0 #Domain name, but no URL
- cnt['HTTPS Entries']=0 #URL, but HTTPS
- cnt['HTTP Entries']=0 #URL and HTTP
- cnt['Other Entries']=0 #Some other protocol
- #unique IPs
- cnt['Total IPs']=0
- cnt['IP-only Unique IPs']=0 #IP only
- cnt['Domain-only IPs']=0 #Domain name, but no URL
- cnt['HTTPS IPs']=0 #URL, but HTTPS
- cnt['HTTP IPs']=0 #URL and HTTP
- cnt['Other IPs']=0 #Some other protocol
- #Lists of IPs
- all=set()
- allbyip=set() #Domain-only and IP-only
- probablybyip=set() #HTTPS, Domain-only and IP-only
- allbydomain=set()
- allbyhttps=set()
- allbyhttp=set()
- allbyother=set()
- templist=[]
- n=0
- #Prefixes to filter-merge
- prefix=['*.', 'www.', 'm.', 'mobi.', 'mobile.', 'ru.', 'ru.www.' , 'www.ru.', 'wap.', 'pda.', 'en.', 'ua.', 'www0.', 'www1.', 'www2.', 'www3.', 'www4.', 'www5.', 'www6.', 'www7.', 'www8.', 'www9.', 'wwww.']
- print ('Parsing Blacklist')
- with open ('D:/Github/z-i/dump.csv', 'r') as csvfile:
- csvr = csv.DictReader (csvfile, fieldnames=['ip','dom','url','who','why','when'], restval='', delimiter=';')
- for row in csvr:
- print ('Processed '+str(csvr.line_num)+' entries.', end='\r', flush=True)
- if row['ip'].find('Updated')>=0 or row['when']=='':
- pass #do nothing
- else:
- cnt['Total Entries']+=1
- who = row['who']
- why = row['why']
- try: # статистика по ведомствам
- cnt2en[who]+=1
- except KeyError:
- cnt2en[who] = 1
- try: # списки доменов по ведомствам без префиксов
- domain=row['dom']
- for pre in prefix:
- if domain[0:len(pre)]==pre:
- domain = domain[len(pre):len(domain)]
- if domains[who].count(domain)==0:
- domains[who].append(domain)
- except KeyError:
- domains[who] = []
- domains[who].append(domain)
- allip = row['ip'].split('|')
- if (row['who']=='суд'): #заполнение статистики судебных дел
- try:
- cnt2delo[why]+=1
- except KeyError:
- cnt2delo[why] = 1
- for ip in allip:
- ip2 = ip.strip()
- k1 = why+' '+ip2
- if k1 in templist:
- pass
- else:
- templist.append(k1)
- try:
- cnt2deloIP[why]+=1
- except KeyError:
- cnt2deloIP[why] = 1
- for ip in allip:
- ip2 = ip.strip()
- if ip2 not in all:
- all.add(ip2)
- try: # статистика по ведомствам
- cnt2ip[who]+=1
- except KeyError:
- cnt2ip[who] = 1
- if ((row['url']=='') and (row['dom']=='')) or (row['dom']==row['ip']):
- cnt['IP-only Entries']+=1
- for ip in allip:
- ip2 = ip.strip()
- if ip2 not in allbyip:
- allbyip.add(ip2)
- elif (row['url']=='') and (row['dom']!='') and (row['dom']!=row['ip']):
- cnt['Domain-only Entries']+=1
- for ip in allip:
- ip2 = ip.strip()
- if (ip2 not in allbydomain):
- allbydomain.add(ip2)
- elif (row['url'].count('https://')>0) and (row['url'].count('http://')==0):
- cnt['HTTPS Entries']+=1
- for ip in allip:
- ip2 = ip.strip()
- if (ip2 not in allbyhttps):
- allbyhttps.add(ip2)
- elif row['url'].count('http://')>0:
- cnt['HTTP Entries']+=1
- for ip in allip:
- ip2 = ip.strip()
- if (ip2 not in allbyhttp):
- allbyhttp.add(ip2)
- else:
- cnt['Other Entries']+=1
- for ip in allip:
- ip2 = ip.strip()
- if (ip2 not in allbyother):
- allbyother.add(ip2)
- print ('')
- print ('Cleanup')
- for ip in (allbyhttp & (allbyhttps | allbydomain | allbyip | allbyother)):
- allbyhttp.remove(ip)
- for ip in (allbyhttps & (allbydomain | allbyip | allbyother)):
- allbyhttps.remove(ip)
- for ip in (allbydomain & (allbyip | allbyother)):
- allbydomain.remove(ip)
- for ip in (allbyip & allbyother):
- allbyip.remove(ip)
- probablybyip = allbyhttps | allbydomain | allbyip | allbyother
- cnt['Total IPs']=len(all)
- cnt['HTTP IPs']=len(allbyhttp)
- cnt['HTTPS IPs']=len(allbyhttps)
- cnt['Domain-only IPs']=len(allbydomain)
- cnt['IP-only Unique IPs']=len(allbyip)
- cnt['Other IPs']=len(allbyother)
- #all[] contains all unique IPs for making VPN route table
- print ('Merging routes')
- cidrs = cidr_merge (all) # merging adgacent IPs into ranges
- f = open ('addrlist.txt', 'w')
- n=0
- for netw in cidrs:
- n+=1
- print ('Processed '+'{:.1%}'.format(n/(len(cidrs))), end='\r', flush=True)
- route = 'push "route ' + str(netw.ip) + ' ' + str(netw.netmask)+ '"'
- f.write (route + '\n')
- f.close()
- print ('Calculating country statistics')
- #Creating country stats
- countrylist = {}
- gi = geoip2.database.Reader('GeoLite2-Country.mmdb')
- n=0
- for addr in all:
- n+=1
- print ('Processed '+'{:.1%}'.format(n/len(all)), end='\r', flush=True)
- try:
- response = gi.country(addr)
- rcountry = response.country.name
- except geoip2.errors.AddressNotFoundError:
- rcountry = 'None'
- if rcountry is None:
- rcountry = 'None'
- if rcountry in countrylist:
- countrylist[rcountry] += 1
- else:
- countrylist[rcountry] = 1
- gi.close()
- print ('Calculating subnet statistics')
- # Cloudflare network list. From https://www.cloudflare.com/ips-v4
- cloudflaresubnetlist = ['103.21.244.0/22','103.22.200.0/22','103.31.4.0/22','104.16.0.0/12','108.162.192.0/18', '131.0.72.0/22', '141.101.64.0/18', '162.158.0.0/15', '172.64.0.0/13', '173.245.48.0/20', '188.114.96.0/20', '190.93.240.0/20', '197.234.240.0/22', '198.41.128.0/17', '199.27.128.0/21']
- digitaloceansubnetlist = cidr_merge(['178.62.0.0/18','178.62.64.0/18','178.62.128.0/18','178.62.192.0/18','138.68.0.0/16','45.55.0.0/16','45.55.108.0/22','45.55.112.0/22','104.131.0.0/16','104.236.0.0/16','107.170.0.0/16','138.197.56.0/22','138.197.60.0/22','138.197.192.0/20','138.197.128.0/20','138.197.144.0/20','159.203.64.0/20','159.203.80.0/20','159.203.96.0/20','159.203.112.0/20','159.203.128.0/20','159.203.144.0/22','159.203.148.0/22','159.203.160.0/20','159.203.192.0/18','159.203.0.0/18','159.203.52.0/22','162.243.0.0/16','192.241.192.0/19','198.199.96.0/20']) #probably not complete
- # Generate Amazon networks list
- data = requests.get('https://ip-ranges.amazonaws.com/ip-ranges.json')
- loaded = json.loads(data.text)['prefixes']
- amazonaws = []
- for addr in loaded:
- if addr['service']=='AMAZON':
- amazonaws.append(addr['ip_prefix'])
- amazonawssubnetlist = cidr_merge(amazonaws)
- #Count special lists
- n=0
- for addr in probablybyip:
- n+=1
- print ('Processed '+'{:.1%}'.format(n/(len(all))), end='\r', flush=True)
- if all_matching_cidrs(addr, cloudflaresubnetlist):
- cnt['Total Cloudflare by IP'] += 1
- cnt['Total Cloudflare'] += 1
- elif all_matching_cidrs(addr, amazonawssubnetlist):
- cnt['Total AmazonAWS by IP'] += 1
- cnt['Total AmazonAWS'] += 1
- elif all_matching_cidrs(addr, digitaloceansubnetlist):
- cnt['Total DO by IP'] += 1
- cnt['Total DO'] += 1
- for addr in (all-probablybyip):
- n+=1
- print ('Processed '+'{:.1%}'.format(n/(len(all))), end='\r', flush=True)
- if all_matching_cidrs(addr, cloudflaresubnetlist):
- cnt['Total Cloudflare'] += 1
- elif all_matching_cidrs(addr, amazonawssubnetlist):
- cnt['Total AmazonAWS'] += 1
- elif all_matching_cidrs(addr, digitaloceansubnetlist):
- cnt['Total DO'] += 1
- print ('Output ')
- print('')
- #Output
- f = open('addrliststats.txt', 'w', encoding='utf-8')
- for k in sorted(cnt):
- print ('{:<27} {:>5}'.format(k, cnt[k]))
- f.write('{:<27} {:>5}'.format(k, cnt[k])+'\n')
- f.write('\nEntries:\n')
- for k in sorted(cnt2en):
- f.write('{:<27} {:>5}'.format(k, cnt2en[k])+'\n')
- f.write('\nUnique IPs:\n')
- for k in sorted(cnt2ip):
- f.write('{:<27} {:>5}'.format(k, cnt2ip[k])+'\n')
- print ('')
- f.write('\n')
- for k in sorted(countrylist):
- print ('{:<27} {:>5}'.format(k, countrylist[k]))
- f.write('{:<27} {:>5}'.format(k, countrylist[k])+'\n')
- f.close()
- for k in sorted(domains):
- f = open('addrlistdomains-'+ k +'.txt', 'w')
- domains[k].sort()
- for l in domains[k]:
- f.write(l + '\n')
- f.close()
- f = open('addrlistdomains-by-IP.txt', 'w', encoding='utf-8')
- for k in probablybyip:
- f.write(k + '\n')
- f.close()
- f = open('addrlistdomains-dela.csv', 'w', encoding='utf-8')
- f.write('Дело, записей, IP\n')
- for k in sorted(cnt2delo):
- f.write('\"{:<20}\", {:>5}, {:>5}'.format(k, cnt2delo[k], cnt2deloIP[k])+'\n')
- f.close()
Add Comment
Please, Sign In to add comment