Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- all_data = pd.concat([xtrain , xtest]).drop(['time', 'code', 'first_prch'], axis=1).iloc[:1000]
- target = all_data.groupby('id').mean()['target']
- mean = all_data.drop('target' , axis=1).groupby('id').mean()
- std = all_data.drop('target' , axis=1).groupby('id').std()
- median = all_data.drop('target' , axis=1).groupby('id').median()
- max_ = all_data.drop('target' , axis=1).groupby('id').max()
- min_ = all_data.drop('target' , axis=1).groupby('id').min()
- sum_ = all_data.drop('target' , axis=1).groupby('id').sum()
- dif = max_ - min_
- last_activity = all_data.sort_values('day_from_begin').groupby(['id']).apply(lambda xf: xf.iloc[-1])
- all_data = pd.concat([target, std, mean , median , max_ , min_, sum_, dif, last_activity], axis=1)
- cols=pd.Series(all_data.columns)
- for dup in all_data.columns.get_duplicates(): cols[all_data.columns.get_loc(dup)]=[dup+'.'+str(d_idx) if d_idx!=0 else dup for d_idx in range(all_data.columns.get_loc(dup).sum())]
- all_data.columns=cols
- x_train = all_data[all_data['target'].notnull()]
- x_test = all_data[all_data['target'].isnull()]
Add Comment
Please, Sign In to add comment