Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- import re, argparse, os
- def show_sorted(dictionary):
- for entry in sorted(dictionary, key=dictionary.get, reverse=True):
- if options.has_key('minimum') == False or dictionary[entry] > int(options['minimum']):
- print str(dictionary[entry]).rjust(total_digits), entry
- print ""
- parser = argparse.ArgumentParser(description='Parse a lighttpd access log.')
- parser.add_argument('logfiles', metavar='logfile', type=str, nargs='+',
- help='path(s) of the logfile(s)')
- parser.add_argument('-e', '--extensions', dest='extensions', action='store',
- help='specify a comma-separated list of extensions to ignore during parsing')
- parser.add_argument('-m', '--minimum', dest='minimum', action='store',
- help='the counting threshold that has to be exceeded to display the entry')
- args = parser.parse_args()
- options = vars(args)
- referers = {}
- days = {}
- hosts = {}
- files = {}
- urls = {}
- extensions = {}
- total_digits = 10
- current_lines = 0
- try:
- ignore_extensions = options['extensions'].split(',')
- except AttributeError:
- ignore_extensions = []
- for logpath in options['logfiles']:
- try:
- log = open(logpath, 'r')
- for line in log:
- try:
- ip, hostname, dash, datetime, timezone, method, uri, version, status, size, referer, useragent = line.split(' ', 11)
- datetime = datetime[1:]
- date = datetime.split(':')[0]
- timezone = timezone[:-1]
- method = method[1:]
- version = version[:-1]
- useragent = useragent[1:-2]
- referer = referer[1:-1]
- filename = uri.split('?')[0]
- extension = os.path.splitext(filename)[1][1:]
- if extension not in ignore_extensions:
- if hostname not in hosts:
- hosts[hostname] = 0
- if referer not in referers:
- referers[referer] = 0
- if date not in days:
- days[date] = 0
- if filename not in files:
- files[filename] = 0
- if uri not in urls:
- urls[uri] = 0
- if extension not in extensions:
- extensions[extension] = 0
- hosts[hostname] += 1
- referers[referer] += 1
- days[date] += 1
- files[filename] += 1
- urls[uri] += 1
- extensions[extension] += 1
- except ValueError:
- print "Corrupt log line at line %d, contents: %s" % (current_lines + 1, line[:-1])
- current_lines += 1
- if current_lines % 1000 == 0:
- print "Processed %d lines." % current_lines
- except IOError:
- print "Could not find file %s, ignored entry." % logpath
- print "Top days:"
- show_sorted(days)
- print ""
- print "Top requested hostnames:"
- show_sorted(hosts)
- print ""
- print "Top files:"
- show_sorted(files)
- print ""
- print "Top extensions:"
- show_sorted(extensions)
- print ""
- print "Top referers:"
- show_sorted(referers)
- print ""
- print "Top URLs:"
- show_sorted(urls)
- print ""
Advertisement
Add Comment
Please, Sign In to add comment