Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ###########################################################
- # Script to parse a typical Apache style access log file
- # and produce an ordered count of hits per page
- #
- # Author: Michael Fitzmaurice, July 2017
- ###########################################################
- import operator
- import sys
- from pprint import PrettyPrinter
- def get_requested_page(raw_log_line):
- verb_page_and_http_version = raw_log_line.split('"')[1]
- tokens = verb_page_and_http_version.split(" ")
- page = "?"
- if len(tokens) > 1:
- page = tokens[1]
- return page
- if __name__ == '__main__':
- log_file_path = sys.argv[1]
- print("\n\tParsing log file at {}".format(log_file_path))
- log_lines = open(log_file_path).readlines()
- print("\tParsed {} lines".format(len(log_lines)))
- page_hit_counts = {}
- line_count = 1
- for line in log_lines:
- page = get_requested_page(line)
- if page not in page_hit_counts:
- page_hit_counts[page] = 1
- else:
- page_hit_counts[page] += 1
- line_count += 1
- pretty_printer = PrettyPrinter(indent=2)
- pretty_printer.pprint(sorted(page_hit_counts.items(), key=operator.itemgetter(1), reverse=True))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement