Advertisement
tranch

goaccess.py

May 16th, 2024
529
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.48 KB | None | 0 0
  1. import re
  2. import pandas as pd
  3.  
  4. from sys import stdin
  5.  
  6. regex = r"""(?P<ipaddress>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(?P<dateandtime>\d{2}\/[a-z]{3}\/\d{4}:\d{2}:\d{2}:\d{2} (\+|\-)\d{4})\] ((\"(GET|POST) )(?P<url>.+)(http\/1\.1")) (?P<statuscode>\d{3}) (?P<bytessent>\d+) (["](?P<refferer>(\-)|(.+))["]) (["](?P<useragent>.+)["])"""
  7. p = re.compile(regex, re.IGNORECASE)
  8. df = pd.DataFrame([p.match(l).groupdict() for l in stdin.readlines() if p.match(l)])
  9.  
  10. print(df.head())
  11.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement