Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- # Import data
- data = pd.read_csv('your_path_to_data_folder/titanic.csv')
- # Remove rows with age missing
- data = data.query('Age == Age')
- # Create dummy for variable Sex
- data['isFemale'] = (data['Sex'] == 'female') * 1
- # Create dummies for variable Embarked
- data = pd.concat(
- [data,
- pd.get_dummies(data.loc[:,'Embarked'], dummy_na=False, prefix='Embarked', prefix_sep='_')],
- axis=1
- )
- # Remove unused columns
- data = data.drop(['Name', 'Ticket', 'Cabin', 'PassengerId', 'Sex', 'Embarked'], axis = 1)
Add Comment
Please, Sign In to add comment