Guest User

Untitled

a guest
Apr 25th, 2018
110
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.55 KB | None | 0 0
  1. def prepare_train_set(PATH_TO_DATA, session_length=10):
  2. df = pd.read_csv(PATH_TO_DATA)
  3. df['site_ID'] = pd.factorize(df.site)[0]
  4. df['freq'] = df.groupby('site_ID', as_index=False)['site'].transform(lambda s: s.count())
  5. dictionary = df[['site', 'site_ID', 'freq']].loc[pd.unique(df['site_ID'])]
  6. dic = dictionary.set_index('site').T.to_dict('list')
  7. df_r = pd.pivot_table(df, values='freq', index='ID', columns='site', aggfunc=np.sum, fill_value=0)
  8. return df_r, dic
  9.  
  10. '''
  11.  
  12. '''
  13. for path in glob(os.path.join(PATH_TO_DATA,'*.csv')):
Add Comment
Please, Sign In to add comment