Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- 23:00:00.100 10
- 23:00:01.200 8
- 23:00:01.600 0
- 23:00:06.300 4
- 23:00:01 NaN ( the first 100ms are missing )
- 23:00:02 5.2 ( 10*0.2 + 8*0.4 + 0*0.4 )
- 23:00:03 0
- 23:00:04 0
- 23:00:05 0
- 23:00:06 2.8 ( 0*0.3 + 4*0.7 )
- data.resample('S', fill_method='pad') # forming a series of seconds
- data = data.resample('L', fill_method='pad') # forming a series of milliseconds
- data.resample('S')
- import pandas as pa
- import numpy as np
- from datetime import datetime
- from datetime import timedelta
- time_stamps=[datetime(2013,04,11,23,00,00,100000),
- datetime(2013,04,11,23,00,1,200000),
- datetime(2013,04,11,23,00,1,600000),
- datetime(2013,04,11,23,00,6,300000)]
- values = [10, 8, 0, 4]
- raw = pa.TimeSeries(index=time_stamps, data=values)
- def round_down_to_second(dt):
- return datetime(year=dt.year, month=dt.month, day=dt.day,
- hour=dt.hour, minute=dt.minute, second=dt.second)
- def round_up_to_second(dt):
- return round_down_to_second(dt) + timedelta(seconds=1)
- def time_weighted_average(data):
- end = pa.DatetimeIndex([round_up_to_second(data.index[-1])])
- return np.average(data, weights=np.diff(data.index.append(end).asi8))
- start = round_down_to_second(time_stamps[0])
- end = round_down_to_second(time_stamps[-1])
- range = pa.date_range(start, end, freq='S')
- data = raw.reindex(raw.index + range)
- data = data.ffill()
- data = data.resample('S', how=time_weighted_average)
- tees = pd.Index(datetime(2000, 1, 1, 23, 0, n) for n in xrange(8))
- df2 = df1.reindex(df1.index + tees)
- df2['value'] = df2.value.ffill()
- In [14]: df2
- Out[14]:
- value
- 2000-01-01 23:00:00 NaN
- 2000-01-01 23:00:00.100000 10
- 2000-01-01 23:00:01 10
- 2000-01-01 23:00:01.200000 8
- 2000-01-01 23:00:01.600000 0
- 2000-01-01 23:00:02 0
- 2000-01-01 23:00:03 0
- 2000-01-01 23:00:04 0
- 2000-01-01 23:00:05 0
- 2000-01-01 23:00:06 0
- 2000-01-01 23:00:06.300000 4
- 2000-01-01 23:00:07 4
- df3['difference'] = df3['index'].shift(-1) - df3['index']
- df3['tot'] = df3.apply(lambda row: np.nan
- if row['difference'].seconds > 2 # a not very robust check for NaT
- else row['difference'].microseconds * row['value'] / 1000000,
- axis=1)
- In [17]: df3
- Out[17]:
- index value difference tot
- 0 2000-01-01 23:00:00 NaN 00:00:00.100000 NaN
- 1 2000-01-01 23:00:00.100000 10 00:00:00.900000 9.0
- 2 2000-01-01 23:00:01 10 00:00:00.200000 2.0
- 3 2000-01-01 23:00:01.200000 8 00:00:00.400000 3.2
- 4 2000-01-01 23:00:01.600000 0 00:00:00.400000 0.0
- 5 2000-01-01 23:00:02 0 00:00:01 0.0
- 6 2000-01-01 23:00:03 0 00:00:01 0.0
- 7 2000-01-01 23:00:04 0 00:00:01 0.0
- 8 2000-01-01 23:00:05 0 00:00:01 0.0
- 9 2000-01-01 23:00:06 0 00:00:00.300000 0.0
- 10 2000-01-01 23:00:06.300000 4 00:00:00.700000 2.8
- 11 2000-01-01 23:00:07 4 NaT NaN
- In [18]: df3.set_index('index')['tot'].resample('S', how='sum')
- Out[18]:
- index
- 2000-01-01 23:00:00 9.0
- 2000-01-01 23:00:01 5.2
- 2000-01-01 23:00:02 0.0
- 2000-01-01 23:00:03 0.0
- 2000-01-01 23:00:04 0.0
- 2000-01-01 23:00:05 0.0
- 2000-01-01 23:00:06 2.8
- 2000-01-01 23:00:07 NaN
- Freq: S, dtype: float64
- 23:00:06 2.8 ( 0*0.3 + 2*0.7 )
- from datetime import datetime
- import traces
- ts = traces.TimeSeries(data=[
- (datetime(2016, 9, 27, 23, 0, 0, 100000), 10),
- (datetime(2016, 9, 27, 23, 0, 1, 200000), 8),
- (datetime(2016, 9, 27, 23, 0, 1, 600000), 0),
- (datetime(2016, 9, 27, 23, 0, 6, 300000), 4),
- ])
- regularized = ts.moving_average(
- start=datetime(2016, 9, 27, 23, 0, 1),
- sampling_period=1,
- placement='left',
- )
- [(datetime(2016, 9, 27, 23, 0, 1), 5.2),
- (datetime(2016, 9, 27, 23, 0, 2), 0.0),
- (datetime(2016, 9, 27, 23, 0, 3), 0.0),
- (datetime(2016, 9, 27, 23, 0, 4), 0.0),
- (datetime(2016, 9, 27, 23, 0, 5), 0.0),
- (datetime(2016, 9, 27, 23, 0, 6), 2.8)]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement