Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- '''Cleaning the *Transcript* dataset'''
- transcript_event = transcript['event'].str.get_dummies();
- transcript_event.columns = ['event_' + '_'.join(col.split(' ')) for col in transcript_event.columns];
- # standardize "offer id" column names
- def transcript_value_clean(x_dict):
- if 'offer id' in x_dict:
- x_dict['offer_id'] = x_dict['offer id'];
- del x_dict['offer id'];
- return x_dict;
- transcript_values = transcript['value'].apply(lambda x: transcript_value_clean(x));
- transcript_values = pd.DataFrame(list(transcript_values.values));
- transcript_values['is_reward'] = transcript_values['reward'].apply(lambda x: int(not np.isnan(x)));
- # merge amount and reward columns
- transcript_values['is_amount'] = transcript_values['amount'].apply(lambda x: int(not np.isnan(x)));
- transcript_values['amount'] = transcript_values[['amount', 'reward']].apply(lambda x: x[0] if np.isnan(x[1]) else x[1], axis=1)
- # filling null offer ids with "0"
- transcript_values['has_offer'] = transcript_values['offer_id'].apply(lambda x: int(not pd.isna(x)));
- transcript_values['offer_id'] = transcript_values['offer_id'].apply(lambda x: '0' if pd.isna(x) else x);
- transcript = pd.concat([transcript, transcript_values, transcript_event], axis=1);
- transcript = transcript.drop(['value', 'event', 'reward'], axis=1);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement