Advertisement
sergioMITM

squid log to mysql table

Mar 13th, 2018
142
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.11 KB | None | 0 0
  1. #!/usr/bin/python
  2. '''
  3. author: @sergiomitm
  4. description:
  5.    takes data from squid access log
  6.    puts requestor's ip, destination hostname,
  7.    number or requests to that host, and requestor's country name
  8.    into a mysql table called requests
  9. '''
  10. import collections, json, urllib, MySQLdb
  11. from collections import Counter
  12. from urlparse import urlparse
  13.  
  14. LOG_FILE = "/var/log/squid/access.log.1"
  15. DB_USER = "*********"
  16. DB_PASS = "*********"
  17. DB_NAME = "*********"
  18.  
  19. def main():
  20.     #get data from access log
  21.     print "loading log file.."
  22.     log = load_access_log(LOG_FILE)
  23.  
  24.     #connect to db
  25.     conn = MySQLdb.connect(host="localhost",
  26.             user=DB_USER,
  27.             passwd=DB_PASS,
  28.             db=DB_NAME)
  29.     cur = conn.cursor()
  30.  
  31.     # geocode the ips
  32.     print "geocoding ips..."
  33.     ips = set()
  34.     for l in log: ips.add(l[0])
  35.     old_ips = get_old_geocodes(cur)
  36.     geocodes = []
  37.     for i in ips:
  38.         try:
  39.             country = [t[1] for t in old_ips if t[0]==i][0]
  40.         except Exception as e:
  41.             country = ''
  42.         if not country: country = geocode(i)
  43.         if not country: country = "Unknown"
  44.         geocodes.append([i, country])
  45.    
  46.     #add data to the db
  47.     print "adding data to db..."
  48.     db_prep(cur)
  49.     for l in log:
  50.         l.append([g[1] for g in geocodes if l[0]==g[0]][0])
  51.         cur.execute("INSERT INTO requests VALUES (%s,%s,%s,%s)", (l[0],l[1],l[2],l[3]))
  52.     conn.commit()
  53.  
  54. def db_prep(x):
  55.     #drop and recreate table
  56.     x.execute("DROP TABLE IF EXISTS requests")
  57.     sql = "CREATE TABLE requests (ip TEXT, host TEXT, count INTEGER, country TEXT)"
  58.     x.execute(sql)
  59.  
  60. def get_old_geocodes(x):
  61.     sql = 'SELECT DISTINCT ip, country FROM requests'
  62.     x.execute(sql)
  63.     old_ips = []
  64.     for r in x.fetchall(): old_ips.append(r)
  65.     return old_ips
  66.  
  67. def geocode(ip):
  68.     u = "http://freegeoip.net/json/" + ip
  69.     try:
  70.         r = urllib.urlopen(u)
  71.         j = json.loads(r.read())
  72.         country = j['country_name']
  73.     except Exception as e:
  74.         country = "Unknown"
  75.     return country
  76.  
  77. def load_access_log(filename):
  78.     #returns list of [ip, host, request_count] from log file
  79.     requests = []
  80.     with open(filename, 'r') as f:
  81.         for line in f:
  82.             ip = line.split()[2]
  83.  
  84.             #urls are a mess, might get confused with method
  85.             #so we try to get something with greated than 8 characters
  86.             url = line.split()[6]
  87.             if len(url)<8: url = line.split()[7]
  88.             if len(url)<8: url = line.split()[8]
  89.             if len(url)<8: url = line.split()[9]
  90.             hostname = urlparse(url).netloc
  91.             if hostname: requests.append(ip+" "+hostname)
  92.  
  93.     #count and sort the list
  94.     sites =  list(collections.Counter(requests).items())
  95.     ssites = sorted(sites,key=lambda x: x[1], reverse=True)
  96.  
  97.     #format the results as a lol
  98.     requests = []
  99.     for s in ssites:
  100.         ip, host = s[0].split()
  101.         count = s[1]            
  102.         r = [ip, host, count]
  103.         requests.append(r)
  104.     return requests
  105.        
  106. if __name__ == "__main__":
  107.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement