Don't like ads? PRO users don't see any ads ;-)
Guest

Untitled

By: a guest on May 8th, 2012  |  syntax: None  |  size: 2.25 KB  |  hits: 9  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. Grouping Messages by Time Intervals
  2. def time_deltas(infile):
  3. entries = (line.split() for line in open(INFILE, "r"))
  4. ts = {}
  5. for e in entries:
  6.     if " ".join(e[2:5]) == "T out: [O]":
  7.         ts[e[8]] = e[0]    
  8.     elif " ".join(e[2:5]) == "T in: [A]":    
  9.         in_ts, ref_id = e[0], e[7]
  10.         out_ts = ts.pop(ref_id, None)
  11.         yield (float(out_ts),ref_id[1:-1],(float(in_ts)*1000 - float(out_ts)*1000))
  12.  
  13. INFILE = 'C:/Users/klee/Documents/test.txt'
  14. import csv
  15.  
  16. with open('test.csv', 'w') as f:
  17. csv.writer(f).writerows(time_deltas(INFILE))
  18.        
  19. import datetime
  20. import bisect
  21. import collections
  22.  
  23. data=[ (datetime.datetime(2010, 2, 26, 12, 8, 17), 5594813L),
  24.   (datetime.datetime(2010, 2, 26, 12, 7, 31), 5594810L),
  25.   (datetime.datetime(2010, 2, 26, 12, 6, 4) , 5594807L),
  26. ]
  27. interval=datetime.timedelta(seconds=50)
  28. start=datetime.datetime(2010, 2, 26, 12, 6, 4)
  29. grid=[start+n*interval for n in range(10)]
  30. bins=collections.defaultdict(list)
  31. for date,num in data:
  32. idx=bisect.bisect(grid,date)
  33.    bins[idx].append(num)
  34. for idx,nums in bins.iteritems():
  35. print('{0} --- {1}'.format(grid[idx],len(nums)))
  36.        
  37. 082438.577652 - T in: [A] accepted. ordID [F25Q6] timestamp [082438.575880] RefNumber [6018786] State [L]
  38.        
  39. from itertools import groupby
  40.  
  41. data = get_time_deltas(INFILE)  
  42. get_key = lambda x: int(x[0])  # function to get group key from data
  43. bins = [(k, list(g)) for k, g in groupby(data, get_key)]
  44.        
  45. # print out the number of messages for each second
  46. for sec, data in bins:
  47.     print('{0} --- {1}'.format(sec, len(data)))
  48.  
  49. # write (sec, msg_per_sec) out to CSV file
  50. import csv
  51. with open("test.csv", "w") as f:
  52.     csv.writer(f).writerows((s, len(d)) for s, d in bins)
  53.  
  54. # get average message per second
  55. message_counts = [len(d) for s, d in bins]
  56. avg_msg_per_second = float(sum(message_count)) / len(message_count)
  57.        
  58. get_key = lambda x: int(x[0] / 60)  # truncate timestamp to the minute
  59.        
  60. def map_time_to_interval_number( epoch, times )
  61.     for t in times:
  62.         delta= (t - epoch)
  63.         delta_t= delta.days*60*60*24 + delta.seconds + delta.microseconds/1000000.0
  64.         interval = delta_t / 50
  65.         yield interval, t
  66.  
  67. counts = defaultdict( int )
  68. epoch = min( data )
  69. for interval, time in map_time_to_interval_number( epoch, data ):
  70.     counts[interval] += 1