Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import sys
- import re
- import operator
- regex = re.compile(r'^(\[.*\])\ (.*)\ (\d+\.\d+\.\d+\.\d+)\ \"(.*)\"\ (\d+) \"(.*)\"\ \"(.*)\"\ \"(.*)\"\ (.*)\ -\ (.*)$')
- if len(sys.argv) < 2:
- print("Usage: script <logfile>")
- sys.exit(1)
- def gettop10(arr, top):
- myDict = {}
- for line in arr:
- if line['referer'] not in myDict:
- myDict[line['referer']] = 1
- else:
- myDict[line['referer']] += 1
- for i in (sorted(myDict.items(), key=operator.itemgetter(1), reverse=True)[:top]):
- print(i)
- def calcavgtime(arr):
- time = 0.0
- count = 0
- for line in arr:
- if int(line['response']) == 200:
- count += 1
- time += float(line['resp_time'])
- print("Avg time: %f\nNum 200: %d" % (time/count, count))
- logLines = []
- data = open(sys.argv[1], 'r').readlines()
- for line in data:
- res = re.match(regex, line.strip())
- logLines.append({"date" : res.group(1), "url" : res.group(2), "ip" : res.group(3), "request" : res.group(4), "response" : res.group(5), "referer" : res.group(6), "UA" : res.group(7), "idk" : res.group(8), "resp_time" : res.group(9), "idk_num" : res.group(10)})
- gettop10(logLines, 10)
- calcavgtime(logLines)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement