Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- !pip install jpholiday
- from jpholiday import year_holidays
- import pandas as pd
- import numpy as np
- def random_datetimes_or_dates(start, end, out_format='datetime', n=10):
- start_u = start.value//10**9
- end_u = end.value//10**9
- return pd.DatetimeIndex((10**9*np.random.randint(start_u, end_u, n, dtype=np.int64)).view('M8[ns]'))
- holidays = pd.to_datetime(np.array(year_holidays(2017))[:,0])
- start = pd.to_datetime('2017-05-01')
- end = pd.to_datetime('2018-01-01')
- df = pd.DataFrame({'date': random_datetimes_or_dates(start, end, out_format= 'datetime', n=1000).sort_values()})
- df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d', errors='ignore')
- # Convert to 4 seasons
- # print('seasons: ', [(month%12 + 3)//3 for month in range(1, 13)])
- # pandas built-in functions
- df['year'] = df['date'].dt.year
- df['month'] = df['date'].dt.month
- df['week'] = df['date'].dt.week
- df['day'] = df['date'].dt.day
- df['hour'] = df['date'].dt.hour
- df['minute'] = df['date'].dt.minute
- df['second'] = df['date'].dt.second
- df['dayofweek'] = df['date'].dt.dayofweek
- df['dayofweek_str'] = df['date'].dt.day_name()
- # numerically convert date to season
- df['season'] = (df['date'].dt.month%12 + 3)//3
- # japanese public holidays
- df['is_holidays'] = df['date'].isin(holidays).astype(int)
- # Late night: 0 ~ 7, Morning: 7 ~ 12, Lunch: 12 ~ 13, Afternoon: 13 ~ 18 Evening: 18 ~ 24
- b = [0,6,11,12,17,24]
- l = [0, 1, 2, 3, 4]
- df['session'] = pd.cut(df['date'].dt.hour, bins=b, labels=l, include_lowest=True)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement