- Grouping Messages by Time Intervals
- def time_deltas(infile):
- entries = (line.split() for line in open(INFILE, "r"))
- ts = {}
- for e in entries:
- if " ".join(e[2:5]) == "T out: [O]":
- ts[e[8]] = e[0]
- elif " ".join(e[2:5]) == "T in: [A]":
- in_ts, ref_id = e[0], e[7]
- out_ts = ts.pop(ref_id, None)
- yield (float(out_ts),ref_id[1:-1],(float(in_ts)*1000 - float(out_ts)*1000))
- INFILE = 'C:/Users/klee/Documents/test.txt'
- import csv
- with open('test.csv', 'w') as f:
- csv.writer(f).writerows(time_deltas(INFILE))
- import datetime
- import bisect
- import collections
- data=[ (datetime.datetime(2010, 2, 26, 12, 8, 17), 5594813L),
- (datetime.datetime(2010, 2, 26, 12, 7, 31), 5594810L),
- (datetime.datetime(2010, 2, 26, 12, 6, 4) , 5594807L),
- ]
- interval=datetime.timedelta(seconds=50)
- start=datetime.datetime(2010, 2, 26, 12, 6, 4)
- grid=[start+n*interval for n in range(10)]
- bins=collections.defaultdict(list)
- for date,num in data:
- idx=bisect.bisect(grid,date)
- bins[idx].append(num)
- for idx,nums in bins.iteritems():
- print('{0} --- {1}'.format(grid[idx],len(nums)))
- 082438.577652 - T in: [A] accepted. ordID [F25Q6] timestamp [082438.575880] RefNumber [6018786] State [L]
- from itertools import groupby
- data = get_time_deltas(INFILE)
- get_key = lambda x: int(x[0]) # function to get group key from data
- bins = [(k, list(g)) for k, g in groupby(data, get_key)]
- # print out the number of messages for each second
- for sec, data in bins:
- print('{0} --- {1}'.format(sec, len(data)))
- # write (sec, msg_per_sec) out to CSV file
- import csv
- with open("test.csv", "w") as f:
- csv.writer(f).writerows((s, len(d)) for s, d in bins)
- # get average message per second
- message_counts = [len(d) for s, d in bins]
- avg_msg_per_second = float(sum(message_count)) / len(message_count)
- get_key = lambda x: int(x[0] / 60) # truncate timestamp to the minute
- def map_time_to_interval_number( epoch, times )
- for t in times:
- delta= (t - epoch)
- delta_t= delta.days*60*60*24 + delta.seconds + delta.microseconds/1000000.0
- interval = delta_t / 50
- yield interval, t
- counts = defaultdict( int )
- epoch = min( data )
- for interval, time in map_time_to_interval_number( epoch, data ):
- counts[interval] += 1