Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- # http://regexr.com/
- import re
- import os
- import time
- import datetime
- text = r'''123.124.554.123 this is 192.168.5.20 a
- 195 ip adress 200.5.3.125 and not an ip 344.45.12.145,
- 244.244.244.0 244.244.50.1
- time: 12:15:11 5:11:30 10:30:22
- date: 31/12/2015 8/13/2010 9/7/2010 1/1/2050
- bytes: 4444
- to test sum:
- 192.168.5.20
- 200.5.3.125
- 5556
- '''
- # date = %d/%m/%Y
- # time = %H:%M:%S
- #with open(file_name) as f:
- # text = f.read()
- ips = re.compile(r'\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b', re.MULTILINE)
- times = re.compile(r'\b(?:[0-5]?[0-9]\:){2}[0-5]?[0-9]\b', re.MULTILINE)
- dates = re.compile(r'\b[0-3]?[0-9]\/(?:1[0-2]|[0-9])\/\d{4}\b', re.MULTILINE)
- bits = [int(i) for i in text.split() if i.isdigit()]
- def make_ip_tuples(lst):
- """
- get list of all ips
- return tuples from each line
- """
- try:
- assert len(lst) % 2 == 0
- except AssertionError:
- print 'lst length not even, try to add some IPs to make couples'
- exit(1)
- result = []
- for i in range(len(lst))[::2]:
- ip_tuple = (lst[i], lst[i+1])
- result.append(ip_tuple)
- return result
- def dump(ip_tuples, bts):
- """
- get list of ip tuples and list of bits for every occurence
- return D: .keys = set uf tuples
- .values = sum of bits
- """
- result = {}
- for i in range(len(ip_tuples)):
- ip_tuple = ip_tuples[i]
- if ip_tuple not in result:
- result[ip_tuple] = result.get(ip_tuple, bts[i])
- else:
- result[ip_tuple] += bts[i]
- return result
- def time_stamp(d, t):
- ts = '%s %s' % (d, t)
- result = time.mktime(datetime.datetime.strptime(ts, '%d/%m/%Y %H:%M:%S').timetuple())
- return result
- def make_output(input_list):
- """
- return dict with all columns in sorted order without dublicates
- """
- sorted_list = sorted(input_list, reverse=True, key=lambda x: time_stamp(x[0], x[1]))
- result = []
- for i in sorted_list:
- if i[2] not in [row[2] for row in result]:
- result.append(list(i))
- else:
- idx = [row[2] for row in result].index(i[2])
- result[idx][3] += i[3]
- return result
- ips = ips.findall(text)
- time_list = times.findall(text)
- date_list = dates.findall(text)
- ip_tuples = make_ip_tuples(ips)
- ip_dump = dump(ip_tuples, bits)
- inp = zip(date_list, time_list, ip_tuples, bits)
- print make_output(inp)
Advertisement
Add Comment
Please, Sign In to add comment