Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- '''
- author: @sergiomitm
- description:
- takes data from squid access log
- puts requestor's ip, destination hostname,
- number or requests to that host, and requestor's country name
- into a mysql table called requests
- '''
- import collections, json, urllib, MySQLdb
- from collections import Counter
- from urlparse import urlparse
- LOG_FILE = "/var/log/squid/access.log.1"
- DB_USER = "*********"
- DB_PASS = "*********"
- DB_NAME = "*********"
- def main():
- #get data from access log
- print "loading log file.."
- log = load_access_log(LOG_FILE)
- #connect to db
- conn = MySQLdb.connect(host="localhost",
- user=DB_USER,
- passwd=DB_PASS,
- db=DB_NAME)
- cur = conn.cursor()
- # geocode the ips
- print "geocoding ips..."
- ips = set()
- for l in log: ips.add(l[0])
- old_ips = get_old_geocodes(cur)
- geocodes = []
- for i in ips:
- try:
- country = [t[1] for t in old_ips if t[0]==i][0]
- except Exception as e:
- country = ''
- if not country: country = geocode(i)
- if not country: country = "Unknown"
- geocodes.append([i, country])
- #add data to the db
- print "adding data to db..."
- db_prep(cur)
- for l in log:
- l.append([g[1] for g in geocodes if l[0]==g[0]][0])
- cur.execute("INSERT INTO requests VALUES (%s,%s,%s,%s)", (l[0],l[1],l[2],l[3]))
- conn.commit()
- def db_prep(x):
- #drop and recreate table
- x.execute("DROP TABLE IF EXISTS requests")
- sql = "CREATE TABLE requests (ip TEXT, host TEXT, count INTEGER, country TEXT)"
- x.execute(sql)
- def get_old_geocodes(x):
- sql = 'SELECT DISTINCT ip, country FROM requests'
- x.execute(sql)
- old_ips = []
- for r in x.fetchall(): old_ips.append(r)
- return old_ips
- def geocode(ip):
- u = "http://freegeoip.net/json/" + ip
- try:
- r = urllib.urlopen(u)
- j = json.loads(r.read())
- country = j['country_name']
- except Exception as e:
- country = "Unknown"
- return country
- def load_access_log(filename):
- #returns list of [ip, host, request_count] from log file
- requests = []
- with open(filename, 'r') as f:
- for line in f:
- ip = line.split()[2]
- #urls are a mess, might get confused with method
- #so we try to get something with greated than 8 characters
- url = line.split()[6]
- if len(url)<8: url = line.split()[7]
- if len(url)<8: url = line.split()[8]
- if len(url)<8: url = line.split()[9]
- hostname = urlparse(url).netloc
- if hostname: requests.append(ip+" "+hostname)
- #count and sort the list
- sites = list(collections.Counter(requests).items())
- ssites = sorted(sites,key=lambda x: x[1], reverse=True)
- #format the results as a lol
- requests = []
- for s in ssites:
- ip, host = s[0].split()
- count = s[1]
- r = [ip, host, count]
- requests.append(r)
- return requests
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement