Advertisement
sergioMITM

parse top sites from squid access log

Feb 5th, 2018
160
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.61 KB | None | 0 0
  1. #!/usr/bin/python
  2.  
  3. import re
  4. import argparse
  5.  
  6. def parse_args():
  7.     parser = argparse.ArgumentParser()
  8.     parser.add_argument("--all", help="show for all ips", action="store_true")
  9.     parser.add_argument("logfile", help="squid access log file", action="store_true")
  10.     parser.add_argument("iplist", help="list of ip addresses", action="store_true")
  11.     return parser.parse_args()
  12.  
  13. def main():
  14.     args = parse_args()
  15.     if args.all:
  16.         outfile = "sites.txt"
  17.     else:
  18.         outfile = "sites_alarmers.txt"
  19.     sites =[]
  20.     log = open(args.logfile, 'r')
  21.     ips = get_alarmers(args.iplist)
  22.     for l in log:
  23.         url = re.split("\s+", l)[6]
  24.         ip = re.split("\s+", l)[2]
  25.         if not args.all:
  26.             if ip not in ips: continue
  27.         if re.match("^\/",url): continue
  28.         host = re.search("^(http[s]?):\/?\/?([^\/\s]+)",url)
  29.         if host:
  30.             site = host.group(2)
  31.         else:
  32.             site = url
  33.         if site.find('.')==-1: continue
  34.         found=False
  35.         for s in sites:
  36.             if s[0]==site:
  37.                 s[1]+=1
  38.                 found = True
  39.                 break
  40.         if not found:
  41.             sites.append([site,1])
  42.             print "new site: %s" %site
  43.     sites = sorted(sites, key=lambda x: int(x[1]))
  44.     with open(outfile,'w') as f:
  45.         for s in sites:
  46.             f.write("%d : %s\n" %(s[1],s[0]))
  47.             print s[1], s[0]
  48.  
  49. def get_alarmers(iplist):
  50.     ips = []
  51.     interest_list = open(iplist,'r')
  52.     for i in interest_list: ips.append(i.strip())
  53.     return ips
  54.  
  55. if __name__ == '__main__':
  56.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement