Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- import pandas as pd
- from sys import stdin
- regex = r"""(?P<ipaddress>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(?P<dateandtime>\d{2}\/[a-z]{3}\/\d{4}:\d{2}:\d{2}:\d{2} (\+|\-)\d{4})\] ((\"(GET|POST) )(?P<url>.+)(http\/1\.1")) (?P<statuscode>\d{3}) (?P<bytessent>\d+) (["](?P<refferer>(\-)|(.+))["]) (["](?P<useragent>.+)["])"""
- p = re.compile(regex, re.IGNORECASE)
- df = pd.DataFrame([p.match(l).groupdict() for l in stdin.readlines() if p.match(l)])
- print(df.head())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement