Guest User

Untitled

a guest
Jun 1st, 2012
134
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.87 KB | None | 0 0
  1. #!/usr/bin/python
  2.  
  3. import re, argparse, os
  4.  
  5. def show_sorted(dictionary):
  6.     for entry in sorted(dictionary, key=dictionary.get, reverse=True):
  7.         if options.has_key('minimum') == False or dictionary[entry] > int(options['minimum']):
  8.             print str(dictionary[entry]).rjust(total_digits), entry
  9.     print ""
  10.  
  11. parser = argparse.ArgumentParser(description='Parse a lighttpd access log.')
  12.  
  13. parser.add_argument('logfiles', metavar='logfile', type=str, nargs='+',
  14.                    help='path(s) of the logfile(s)')
  15.  
  16. parser.add_argument('-e', '--extensions', dest='extensions', action='store',
  17.                    help='specify a comma-separated list of extensions to ignore during parsing')
  18.                    
  19. parser.add_argument('-m', '--minimum', dest='minimum', action='store',
  20.                    help='the counting threshold that has to be exceeded to display the entry')
  21.  
  22. args = parser.parse_args()
  23. options = vars(args)
  24.  
  25. referers = {}
  26. days = {}
  27. hosts = {}
  28. files = {}
  29. urls = {}
  30. extensions = {}
  31.  
  32. total_digits = 10
  33. current_lines = 0
  34.  
  35. try:
  36.     ignore_extensions = options['extensions'].split(',')
  37. except AttributeError:
  38.     ignore_extensions = []
  39.  
  40. for logpath in options['logfiles']:
  41.     try:
  42.         log = open(logpath, 'r')
  43.        
  44.         for line in log:
  45.             try:
  46.                 ip, hostname, dash, datetime, timezone, method, uri, version, status, size, referer, useragent = line.split(' ', 11)
  47.                 datetime = datetime[1:]
  48.                 date = datetime.split(':')[0]
  49.                 timezone = timezone[:-1]
  50.                 method = method[1:]
  51.                 version = version[:-1]
  52.                 useragent = useragent[1:-2]
  53.                 referer = referer[1:-1]
  54.                 filename = uri.split('?')[0]
  55.                 extension = os.path.splitext(filename)[1][1:]
  56.                
  57.                 if extension not in ignore_extensions:
  58.                     if hostname not in hosts:
  59.                         hosts[hostname] = 0
  60.                    
  61.                     if referer not in referers:
  62.                         referers[referer] = 0
  63.                    
  64.                     if date not in days:
  65.                         days[date] = 0
  66.                    
  67.                     if filename not in files:
  68.                         files[filename] = 0
  69.                    
  70.                     if uri not in urls:
  71.                         urls[uri] = 0
  72.                        
  73.                     if extension not in extensions:
  74.                         extensions[extension] = 0
  75.                        
  76.                     hosts[hostname] += 1
  77.                     referers[referer] += 1
  78.                     days[date] += 1
  79.                     files[filename] += 1
  80.                     urls[uri] += 1
  81.                     extensions[extension] += 1
  82.             except ValueError:
  83.                 print "Corrupt log line at line %d, contents: %s" % (current_lines + 1, line[:-1])
  84.                
  85.             current_lines += 1
  86.            
  87.             if current_lines % 1000 == 0:
  88.                 print "Processed %d lines." % current_lines    
  89.     except IOError:
  90.         print "Could not find file %s, ignored entry." % logpath
  91.  
  92. print "Top days:"
  93. show_sorted(days)
  94. print ""
  95.  
  96. print "Top requested hostnames:"
  97. show_sorted(hosts)
  98. print ""
  99.  
  100. print "Top files:"
  101. show_sorted(files)
  102. print ""
  103.  
  104. print "Top extensions:"
  105. show_sorted(extensions)
  106. print ""
  107.  
  108. print "Top referers:"
  109. show_sorted(referers)
  110. print ""
  111.  
  112. print "Top URLs:"
  113. show_sorted(urls)
  114. print ""
Advertisement
Add Comment
Please, Sign In to add comment