Advertisement
jack06215

[pandas] General pd.datetime tricks

Jun 13th, 2020
245
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.38 KB | None | 0 0
  1. !pip install jpholiday
  2. from jpholiday import year_holidays
  3. import pandas as pd
  4. import numpy as np
  5.  
  6. def random_datetimes_or_dates(start, end, out_format='datetime', n=10):
  7.     start_u = start.value//10**9
  8.     end_u = end.value//10**9
  9.  
  10.     return pd.DatetimeIndex((10**9*np.random.randint(start_u, end_u, n, dtype=np.int64)).view('M8[ns]'))
  11.  
  12. holidays = pd.to_datetime(np.array(year_holidays(2017))[:,0])
  13.  
  14. start = pd.to_datetime('2017-05-01')
  15. end = pd.to_datetime('2018-01-01')
  16.  
  17. df = pd.DataFrame({'date': random_datetimes_or_dates(start, end, out_format= 'datetime', n=1000).sort_values()})
  18. df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d', errors='ignore')
  19.  
  20.  
  21. # Convert to 4 seasons
  22. # print('seasons: ', [(month%12 + 3)//3 for month in range(1, 13)])
  23. # pandas built-in functions
  24. df['year'] = df['date'].dt.year
  25. df['month'] = df['date'].dt.month
  26. df['week'] = df['date'].dt.week
  27. df['day'] = df['date'].dt.day
  28. df['hour'] = df['date'].dt.hour
  29. df['minute'] = df['date'].dt.minute
  30. df['second'] = df['date'].dt.second
  31. df['dayofweek'] = df['date'].dt.dayofweek
  32. df['dayofweek_str'] = df['date'].dt.day_name()
  33.  
  34. # numerically convert date to season
  35. df['season'] = (df['date'].dt.month%12 + 3)//3
  36.  
  37. # japanese public holidays
  38. df['is_holidays'] = df['date'].isin(holidays).astype(int)
  39.  
  40. # Late night: 0 ~ 7, Morning: 7 ~ 12, Lunch: 12 ~ 13, Afternoon: 13 ~ 18 Evening: 18 ~ 24
  41. b = [0,6,11,12,17,24]
  42. l = [0, 1, 2, 3, 4]
  43. df['session'] = pd.cut(df['date'].dt.hour, bins=b, labels=l, include_lowest=True)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement