Guest User

Untitled

a guest
Nov 19th, 2018
99
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.52 KB | None | 0 0
  1. import pandas as pd
  2.  
  3. # Import data
  4. data = pd.read_csv('your_path_to_data_folder/titanic.csv')
  5.  
  6. # Remove rows with age missing
  7. data = data.query('Age == Age')
  8.  
  9. # Create dummy for variable Sex
  10. data['isFemale'] = (data['Sex'] == 'female') * 1
  11.  
  12. # Create dummies for variable Embarked
  13. data = pd.concat(
  14. [data,
  15. pd.get_dummies(data.loc[:,'Embarked'], dummy_na=False, prefix='Embarked', prefix_sep='_')],
  16. axis=1
  17. )
  18.  
  19. # Remove unused columns
  20. data = data.drop(['Name', 'Ticket', 'Cabin', 'PassengerId', 'Sex', 'Embarked'], axis = 1)
Add Comment
Please, Sign In to add comment