Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python2.7
- def parse_row(s):
- row = []
- qe = qp = None
- for s in s.replace('\r','').replace('\n','').split(' '):
- if qp:
- qp.append(s)
- elif '' == s:
- row.append('')
- elif '"' == s[0]:
- qp = [ s ]
- qe = '"'
- elif '[' == s[0]:
- qp = [ s ]
- qe = ']'
- else:
- row.append(s)
- l = len(s)
- if l and qe == s[-1]:
- if l == 1 or s[-2] != '\\':
- row.append(' '.join(qp)[1:-1].replace('\\'+qe, qe))
- qp = qe = None
- return row
- ip = {}
- ip_404 = {}
- hours = [0] * 24
- with open('web-access.log') as f:
- for line in f:
- req = parse_row(line)
- addr = req[0]
- status = req[4]
- hour = int(req[2].split(":")[1])
- ip[addr] = ip.get(addr, 0) + 1
- if status == '404':
- ip_404[addr] = ip_404.get(addr, 0) + 1
- hours[hour] = hours[hour] + 1
- ip = sorted(ip, key=ip.get, reverse=True)
- ip_404 = sorted(ip_404, key=ip_404.get, reverse=True)
- max_req = hours[0]
- max_hour = 0
- for i in range (0, 24):
- if hours[i] > max_req:
- max_req = hours[i]
- max_hour = i
- print "1) TOP-5 IP-addresses:"
- for i in range(0, 5):
- print ip[i]
- print "2) Most frequent 404 IP-address:"
- print ip_404[0]
- print "3) The most active hour:"
- print max_hour
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement