Guest User

Untitled

a guest
Mar 19th, 2018
118
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.04 KB | None | 0 0
  1. all_data = pd.concat([xtrain , xtest]).drop(['time', 'code', 'first_prch'], axis=1).iloc[:1000]
  2.  
  3. target = all_data.groupby('id').mean()['target']
  4.  
  5. mean = all_data.drop('target' , axis=1).groupby('id').mean()
  6. std = all_data.drop('target' , axis=1).groupby('id').std()
  7. median = all_data.drop('target' , axis=1).groupby('id').median()
  8. max_ = all_data.drop('target' , axis=1).groupby('id').max()
  9. min_ = all_data.drop('target' , axis=1).groupby('id').min()
  10. sum_ = all_data.drop('target' , axis=1).groupby('id').sum()
  11. dif = max_ - min_
  12. last_activity = all_data.sort_values('day_from_begin').groupby(['id']).apply(lambda xf: xf.iloc[-1])
  13.  
  14. all_data = pd.concat([target, std, mean , median , max_ , min_, sum_, dif, last_activity], axis=1)
  15.  
  16. cols=pd.Series(all_data.columns)
  17. for dup in all_data.columns.get_duplicates(): cols[all_data.columns.get_loc(dup)]=[dup+'.'+str(d_idx) if d_idx!=0 else dup for d_idx in range(all_data.columns.get_loc(dup).sum())]
  18. all_data.columns=cols
  19.  
  20. x_train = all_data[all_data['target'].notnull()]
  21. x_test = all_data[all_data['target'].isnull()]
Add Comment
Please, Sign In to add comment