Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # ### Titanic Python Exercise
- #
- # Work book for submission to Decoded Data Fellowship to preeict survical outcome based on gender and Pclass
- # In[68]:
- # Import libraries
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- # In[69]:
- # Read dataset
- test = pd.read_csv("test.csv")
- # In[70]:
- # EDA | Generate crosstab to review the dataset
- pd.crosstab(test.Sex, test.Pclass, normalize=False, margins=True)
- # In[71]:
- # add a column and set intial values
- test['ModelPrediction'] = 0
- # In[72]:
- # assign prediction for females = survived, unless travelling in Pclass = 3 | all males under 18 in PClass 1,2
- test.loc[test['Sex'] == "female", 'ModelPrediction'] = 1
- test.loc[test['Pclass'] == 3, 'ModelPrediction'] = 0
- test.loc[((test['Pclass'] == 1) | (test['Pclass'] == 2)) & (test['Sex'] == "male") & (test.Age < 18), 'ModelPrediction'] = 1
- # In[73]:
- # review results
- test.head()
- # In[74]:
- # calculate predicted survival outcome
- sum(test.ModelPrediction == 1) / test.shape[0] #shape counts the number of rows in the dataset
- # In[75]:
- # generate bar chart showing predicted survival outcome
- test.groupby(['ModelPrediction', 'Sex']).size().unstack().plot(kind='barh', stacked=True)
- plt.title('Predicted survival outcome by gender')
- plt.show()
- # In[76]:
- # Create a new data frame for the output
- submission = test.filter(['PassengerId', 'ModelPrediction'])
- submission.head()
- # In[77]:
- # Create the csv file output
- submission.to_csv('titanic_in_python_submission.csv', index=False)
- # End of Notebook
Add Comment
Please, Sign In to add comment