Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #! /usr/bin/env python3
- import pandas as pd
- import numpy as np
- import datetime
- dr = pd.date_range('11/6/2012', periods = 12, freq = 'W-Tue')
- df = pd.DataFrame(np.random.randn(len(dr)), index = dr, columns = ['column1'])
- df = df.resample(rule = '28D', how = 'mean')
- # The start dates for each period
- df
- # column1
- # 2012-11-06 -0.134757
- # 2012-12-04 -0.382133
- # 2013-01-01 -0.468343
- # 2013-01-29 0.000000
- # [4 rows x 1 columns]
- # Vacation dates
- vaca_days = list(pd.date_range(start = '2012-12-1', periods = 12, freq = 'D'))
- vaca_days
- # Timestamp('2012-12-01 00:00:00', tz=None),
- # Timestamp('2012-12-02 00:00:00', tz=None),
- # Timestamp('2012-12-03 00:00:00', tz=None),
- # Timestamp('2012-12-04 00:00:00', tz=None),
- # Timestamp('2012-12-05 00:00:00', tz=None),
- # Timestamp('2012-12-06 00:00:00', tz=None),
- # Timestamp('2012-12-07 00:00:00', tz=None),
- # Timestamp('2012-12-08 00:00:00', tz=None),
- # Timestamp('2012-12-09 00:00:00', tz=None),
- # Timestamp('2012-12-10 00:00:00', tz=None),
- # Timestamp('2012-12-11 00:00:00', tz=None),
- # Timestamp('2012-12-12 00:00:00', tz=None)]
- # Clearly there should be 3 vacation dates in the 2012-11-06 period
- # and 9 vacation dates in the 2013-12-04 period
- # ---------------------------
- # This gives me wrong numbers, because it tries to start the period on vaca_days[0]
- pd.Series(1, vaca_days).resample('28D', how='sum')
- # 2012-12-01 12
- # 2012-12-29 0
- # Freq: 28D, dtype: int64
- # No help here
- pd.Series(1, vaca_days).resample('28D', how='sum').shift(-1)
- # 2012-12-01 0
- # 2012-12-29 NaN
- # Freq: 28D, dtype: float64
- # The only workaround I could figure out: Append on the first start day
- vaca_days.append(pd.Timestamp('2012-11-06'))
- vaca_series = pd.Series([0] + [1 for each in vaca_days[:-1]], sorted(vaca_days))
- vaca_series = vaca_series.resample(rule = '28D', how = sum)
- vaca_series
- # 2012-11-06 3
- # 2012-12-04 9
- # 2013-01-01 0
- # Freq: 28D, dtype: int64
- df['Vacation Days'] = vaca_series
- df['Vacation Days'] = df['Vacation Days'].fillna(0)
- df
- # column1 Vacation Days
- # 2012-11-06 -4.761727e-01 3
- # 2012-12-04 5.715697e-01 9
- # 2013-01-01 4.065451e-02 0
- # 2013-01-29 2.781364e-309 0
- # [4 rows x 2 columns]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement