Advertisement
Guest User

Untitled

a guest
Jun 18th, 2019
67
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.28 KB | None | 0 0
  1. '''Cleaning the *Transcript* dataset'''
  2. transcript_event = transcript['event'].str.get_dummies();
  3. transcript_event.columns = ['event_' + '_'.join(col.split(' ')) for col in transcript_event.columns];
  4.  
  5. # standardize "offer id" column names
  6. def transcript_value_clean(x_dict):
  7. if 'offer id' in x_dict:
  8. x_dict['offer_id'] = x_dict['offer id'];
  9. del x_dict['offer id'];
  10. return x_dict;
  11.  
  12. transcript_values = transcript['value'].apply(lambda x: transcript_value_clean(x));
  13. transcript_values = pd.DataFrame(list(transcript_values.values));
  14. transcript_values['is_reward'] = transcript_values['reward'].apply(lambda x: int(not np.isnan(x)));
  15.  
  16. # merge amount and reward columns
  17. transcript_values['is_amount'] = transcript_values['amount'].apply(lambda x: int(not np.isnan(x)));
  18. transcript_values['amount'] = transcript_values[['amount', 'reward']].apply(lambda x: x[0] if np.isnan(x[1]) else x[1], axis=1)
  19.  
  20. # filling null offer ids with "0"
  21. transcript_values['has_offer'] = transcript_values['offer_id'].apply(lambda x: int(not pd.isna(x)));
  22. transcript_values['offer_id'] = transcript_values['offer_id'].apply(lambda x: '0' if pd.isna(x) else x);
  23.  
  24. transcript = pd.concat([transcript, transcript_values, transcript_event], axis=1);
  25. transcript = transcript.drop(['value', 'event', 'reward'], axis=1);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement