Advertisement
Guest User

Untitled

a guest
Sep 27th, 2016
51
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.14 KB | None | 0 0
  1. 23:00:00.100 10
  2. 23:00:01.200 8
  3. 23:00:01.600 0
  4. 23:00:06.300 4
  5.  
  6. 23:00:01 NaN ( the first 100ms are missing )
  7. 23:00:02 5.2 ( 10*0.2 + 8*0.4 + 0*0.4 )
  8. 23:00:03 0
  9. 23:00:04 0
  10. 23:00:05 0
  11. 23:00:06 2.8 ( 0*0.3 + 4*0.7 )
  12.  
  13. data.resample('S', fill_method='pad') # forming a series of seconds
  14.  
  15. data = data.resample('L', fill_method='pad') # forming a series of milliseconds
  16. data.resample('S')
  17.  
  18. import pandas as pa
  19. import numpy as np
  20. from datetime import datetime
  21. from datetime import timedelta
  22.  
  23. time_stamps=[datetime(2013,04,11,23,00,00,100000),
  24. datetime(2013,04,11,23,00,1,200000),
  25. datetime(2013,04,11,23,00,1,600000),
  26. datetime(2013,04,11,23,00,6,300000)]
  27. values = [10, 8, 0, 4]
  28. raw = pa.TimeSeries(index=time_stamps, data=values)
  29.  
  30. def round_down_to_second(dt):
  31. return datetime(year=dt.year, month=dt.month, day=dt.day,
  32. hour=dt.hour, minute=dt.minute, second=dt.second)
  33.  
  34. def round_up_to_second(dt):
  35. return round_down_to_second(dt) + timedelta(seconds=1)
  36.  
  37. def time_weighted_average(data):
  38. end = pa.DatetimeIndex([round_up_to_second(data.index[-1])])
  39. return np.average(data, weights=np.diff(data.index.append(end).asi8))
  40.  
  41. start = round_down_to_second(time_stamps[0])
  42. end = round_down_to_second(time_stamps[-1])
  43. range = pa.date_range(start, end, freq='S')
  44. data = raw.reindex(raw.index + range)
  45. data = data.ffill()
  46.  
  47. data = data.resample('S', how=time_weighted_average)
  48.  
  49. tees = pd.Index(datetime(2000, 1, 1, 23, 0, n) for n in xrange(8))
  50. df2 = df1.reindex(df1.index + tees)
  51. df2['value'] = df2.value.ffill()
  52.  
  53. In [14]: df2
  54. Out[14]:
  55. value
  56. 2000-01-01 23:00:00 NaN
  57. 2000-01-01 23:00:00.100000 10
  58. 2000-01-01 23:00:01 10
  59. 2000-01-01 23:00:01.200000 8
  60. 2000-01-01 23:00:01.600000 0
  61. 2000-01-01 23:00:02 0
  62. 2000-01-01 23:00:03 0
  63. 2000-01-01 23:00:04 0
  64. 2000-01-01 23:00:05 0
  65. 2000-01-01 23:00:06 0
  66. 2000-01-01 23:00:06.300000 4
  67. 2000-01-01 23:00:07 4
  68.  
  69. df3['difference'] = df3['index'].shift(-1) - df3['index']
  70. df3['tot'] = df3.apply(lambda row: np.nan
  71. if row['difference'].seconds > 2 # a not very robust check for NaT
  72. else row['difference'].microseconds * row['value'] / 1000000,
  73. axis=1)
  74.  
  75. In [17]: df3
  76. Out[17]:
  77. index value difference tot
  78. 0 2000-01-01 23:00:00 NaN 00:00:00.100000 NaN
  79. 1 2000-01-01 23:00:00.100000 10 00:00:00.900000 9.0
  80. 2 2000-01-01 23:00:01 10 00:00:00.200000 2.0
  81. 3 2000-01-01 23:00:01.200000 8 00:00:00.400000 3.2
  82. 4 2000-01-01 23:00:01.600000 0 00:00:00.400000 0.0
  83. 5 2000-01-01 23:00:02 0 00:00:01 0.0
  84. 6 2000-01-01 23:00:03 0 00:00:01 0.0
  85. 7 2000-01-01 23:00:04 0 00:00:01 0.0
  86. 8 2000-01-01 23:00:05 0 00:00:01 0.0
  87. 9 2000-01-01 23:00:06 0 00:00:00.300000 0.0
  88. 10 2000-01-01 23:00:06.300000 4 00:00:00.700000 2.8
  89. 11 2000-01-01 23:00:07 4 NaT NaN
  90.  
  91. In [18]: df3.set_index('index')['tot'].resample('S', how='sum')
  92. Out[18]:
  93. index
  94. 2000-01-01 23:00:00 9.0
  95. 2000-01-01 23:00:01 5.2
  96. 2000-01-01 23:00:02 0.0
  97. 2000-01-01 23:00:03 0.0
  98. 2000-01-01 23:00:04 0.0
  99. 2000-01-01 23:00:05 0.0
  100. 2000-01-01 23:00:06 2.8
  101. 2000-01-01 23:00:07 NaN
  102. Freq: S, dtype: float64
  103.  
  104. 23:00:06 2.8 ( 0*0.3 + 2*0.7 )
  105.  
  106. from datetime import datetime
  107. import traces
  108.  
  109. ts = traces.TimeSeries(data=[
  110. (datetime(2016, 9, 27, 23, 0, 0, 100000), 10),
  111. (datetime(2016, 9, 27, 23, 0, 1, 200000), 8),
  112. (datetime(2016, 9, 27, 23, 0, 1, 600000), 0),
  113. (datetime(2016, 9, 27, 23, 0, 6, 300000), 4),
  114. ])
  115.  
  116. regularized = ts.moving_average(
  117. start=datetime(2016, 9, 27, 23, 0, 1),
  118. sampling_period=1,
  119. placement='left',
  120. )
  121.  
  122. [(datetime(2016, 9, 27, 23, 0, 1), 5.2),
  123. (datetime(2016, 9, 27, 23, 0, 2), 0.0),
  124. (datetime(2016, 9, 27, 23, 0, 3), 0.0),
  125. (datetime(2016, 9, 27, 23, 0, 4), 0.0),
  126. (datetime(2016, 9, 27, 23, 0, 5), 0.0),
  127. (datetime(2016, 9, 27, 23, 0, 6), 2.8)]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement