Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def prepare_train_set(PATH_TO_DATA, session_length=10):
- df = pd.read_csv(PATH_TO_DATA)
- df['site_ID'] = pd.factorize(df.site)[0]
- df['freq'] = df.groupby('site_ID', as_index=False)['site'].transform(lambda s: s.count())
- dictionary = df[['site', 'site_ID', 'freq']].loc[pd.unique(df['site_ID'])]
- dic = dictionary.set_index('site').T.to_dict('list')
- df_r = pd.pivot_table(df, values='freq', index='ID', columns='site', aggfunc=np.sum, fill_value=0)
- return df_r, dic
- '''
- '''
- for path in glob(os.path.join(PATH_TO_DATA,'*.csv')):
Add Comment
Please, Sign In to add comment