Advertisement
Guest User

Untitled

a guest
Jul 31st, 2015
179
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.13 KB | None | 0 0
  1. import sys
  2. import re
  3. import operator
  4.  
  5. regex = re.compile(r'^(\[.*\])\ (.*)\ (\d+\.\d+\.\d+\.\d+)\ \"(.*)\"\ (\d+) \"(.*)\"\ \"(.*)\"\ \"(.*)\"\ (.*)\ -\ (.*)$')
  6.  
  7.  
  8. if len(sys.argv) < 2:
  9. print("Usage: script <logfile>")
  10. sys.exit(1)
  11.  
  12. def gettop10(arr, top):
  13. myDict = {}
  14. for line in arr:
  15. if line['referer'] not in myDict:
  16. myDict[line['referer']] = 1
  17. else:
  18. myDict[line['referer']] += 1
  19. for i in (sorted(myDict.items(), key=operator.itemgetter(1), reverse=True)[:top]):
  20. print(i)
  21.  
  22. def calcavgtime(arr):
  23. time = 0.0
  24. count = 0
  25. for line in arr:
  26. if int(line['response']) == 200:
  27. count += 1
  28. time += float(line['resp_time'])
  29.  
  30. print("Avg time: %f\nNum 200: %d" % (time/count, count))
  31.  
  32. logLines = []
  33. data = open(sys.argv[1], 'r').readlines()
  34. for line in data:
  35. res = re.match(regex, line.strip())
  36. logLines.append({"date" : res.group(1), "url" : res.group(2), "ip" : res.group(3), "request" : res.group(4), "response" : res.group(5), "referer" : res.group(6), "UA" : res.group(7), "idk" : res.group(8), "resp_time" : res.group(9), "idk_num" : res.group(10)})
  37.  
  38.  
  39.  
  40. gettop10(logLines, 10)
  41. calcavgtime(logLines)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement