Advertisement
sergioMITM

squid log image/css/js ratios

Apr 2nd, 2018
180
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.75 KB | None | 0 0
  1. #!/bin/python
  2.  
  3. '''
  4. parses squid access log to find
  5. ratio of images, css, js for each ip.
  6. can be used for detection of bots.
  7. '''
  8.  
  9. import pandas as pd
  10. import numpy as np
  11. from collections import namedtuple
  12.  
  13. def main():
  14.     data = []
  15.     labels = ['ip','method','js','css','img']
  16.     methods = ['CONNECT', 'GET', 'HEAD', 'NONE', 'OPTIONS', 'POST', 'PUT', 'TRACE']
  17.     squid_logfile = namedtuple('logfile','time elapsed remotehost code bytes method url rfc peer type')
  18.  
  19.     with open("/var/log/squid/access.log.1", 'r') as log:
  20.         for l in log:
  21.             try:
  22.                 lf = squid_logfile._make(l.split())
  23.             except TypeError:
  24.                 #line is screwed up for some reason, ususally a funky url
  25.                 continue
  26.             if lf.method not in methods:
  27.                 #another indicator of  line import problem
  28.                 continue
  29.  
  30.             #parse the line fo the things we are interested in
  31.             js,css,img=[0,0,0]
  32.             if 'javascript' in lf.type:
  33.                 js = 1
  34.             elif 'css' in lf.type:
  35.                 css = 1
  36.             elif 'image' in lf.type:
  37.                 img = 1
  38.             data.append([lf.remotehost,lf.method,js,css,img])
  39.     df = pd.DataFrame.from_records(data, columns=labels)
  40.     data = [] #this line is responsible for 60seconds faster execution
  41.  
  42.     method_table =  df.pivot_table(index='method',values=['js','css','img'], aggfunc=[np.mean, len])
  43.     print method_table
  44.  
  45.     #we're not able to interpret encrypted data
  46.     get_df = df.loc[df['method']!='CONNECT']
  47.     ip_table =  get_df.pivot_table(index='ip',values=['js','css','img'], aggfunc=[np.mean, len])
  48.     ip_table.to_csv('table.csv', header='ip')
  49.  
  50. if __name__ == "__main__":
  51.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement