Advertisement
Guest User

Untitled

a guest
Jul 28th, 2017
69
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.21 KB | None | 0 0
  1. ###########################################################
  2. # Script to parse a typical Apache style access log file
  3. # and produce an ordered count of hits per page
  4. #
  5. # Author: Michael Fitzmaurice, July 2017
  6. ###########################################################
  7. import operator
  8. import sys
  9.  
  10. from pprint import PrettyPrinter
  11.  
  12. def get_requested_page(raw_log_line):
  13.     verb_page_and_http_version = raw_log_line.split('"')[1]
  14.     tokens = verb_page_and_http_version.split(" ")
  15.     page = "?"
  16.     if len(tokens) > 1:
  17.         page = tokens[1]
  18.     return page
  19.  
  20. if __name__ == '__main__':
  21.     log_file_path = sys.argv[1]
  22.     print("\n\tParsing log file at {}".format(log_file_path))
  23.     log_lines = open(log_file_path).readlines()
  24.     print("\tParsed {} lines".format(len(log_lines)))
  25.  
  26.     page_hit_counts = {}
  27.     line_count = 1
  28.     for line in log_lines:
  29.         page = get_requested_page(line)
  30.         if page not in page_hit_counts:
  31.             page_hit_counts[page] = 1
  32.         else:
  33.             page_hit_counts[page] += 1
  34.         line_count += 1
  35.  
  36.     pretty_printer = PrettyPrinter(indent=2)
  37.     pretty_printer.pprint(sorted(page_hit_counts.items(), key=operator.itemgetter(1), reverse=True))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement