Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #! usr/bin/env python3
- # coding=utf-8
- """
- This assignment required us to convert a raw access log
- into a dictionary list.
- To do this, I separated the log into individual lines
- and used a regexp pattern to extract the IP address, timestamp,
- and GET request from each line.
- I then paired each of these elements with a descriptive dictionary keyword.
- Alex Mote
- """
- import re
- def logtolist(file):
- """
- Separate elements of a given access log file
- and return a dictionary list.
- Parameters
- ----------
- file : text file
- The access log file you wish to evaluate.
- Returns
- -------
- log_list_dict : list
- A list of dictionary relationships for each element in the log;
- ip_address = The IP address requesting access.
- timestamp = The time at which access was requested.
- request = The command request itself, e.g. "GET /robots.txt HTTP/1.0"
- """
- log = open(file, "r")
- log_str = log.read()
- regex = '([\d\.]+) - - \[(.*?)\] "(.*?)" .*'
- # pattern of requests in log
- log_list = log_str.split("\n") # separate log into individual lines
- log_ele = []
- for item in log_list:
- log_ele.append(
- re.split(regex, item)
- ) # turn each line into a list of components
- del log_ele[206] # Last item in list is blank for some reason
- key_list = ["ip_address", "timestamp", "request"]
- log_dict_list = []
- for ele in log_ele: # pair each element with its relevant keyword
- log_dict_list.append(
- {key_list[0]: ele[1], key_list[1]: ele[2], key_list[2]: ele[3]}
- )
- return log_dict_list
- if __name__ == "__main__":
- print(logtolist("mini-access-log.txt"))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement