Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- df_raw_dates = pd.DataFrame({"id": [102, 102, 102, 103, 103, 103, 104], "val": [9,2,4,7,6,3,2], "dates": [pd.Timestamp(2002, 1, 1), pd.Timestamp(2002, 3, 3), pd.Timestamp(2003, 4, 4), pd.Timestamp(2003, 8, 9), pd.Timestamp(2005, 2, 3), pd.Timestamp(2005, 2, 8), pd.Timestamp(2005, 2, 3)]})
- id val dates
- 0 102 9 2002-01-01
- 1 102 2 2002-03-03
- 2 102 4 2003-04-04
- 3 103 7 2003-08-09
- 4 103 6 2005-02-03
- 5 103 3 2005-02-08
- 6 104 2 2005-02-03
- df_processed_dates = pd.DataFrame({"id": [102, 102, 102, 103, 103, 103, 104], "val": [9,2,4,7,6,3,2], "diff_dates": [0, 61, 397, 0, 544, 5, 0]})
- id val diff_dates
- 0 102 9 0
- 1 102 2 61
- 2 102 4 397
- 3 103 7 0
- 4 103 6 544
- 5 103 3 5
- 6 104 2 0
- df_raw_dates.groupby('id').dates.diff().dt.days.fillna(0, downcast='infer')
- 0 0
- 1 61
- 2 397
- 3 0
- 4 544
- 5 5
- 6 0
- Name: dates, dtype: int64
- df_raw_dates['date_diff'] = (
- df_raw_dates
- .pop('dates')
- .groupby(df_raw_dates['id'])
- .diff()
- .dt.days
- .fillna(0, downcast='infer'))
- df_raw_dates
- id val date_diff
- 0 102 9 0
- 1 102 2 61
- 2 102 4 397
- 3 103 7 0
- 4 103 6 544
- 5 103 3 5
- 6 104 2 0
Add Comment
Please, Sign In to add comment