Guest User

Untitled

a guest
Nov 18th, 2018
100
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.51 KB | None | 0 0
  1. # ### Titanic Python Exercise
  2. #
  3. # Work book for submission to Decoded Data Fellowship to preeict survical outcome based on gender and Pclass
  4.  
  5. # In[68]:
  6.  
  7.  
  8. # Import libraries
  9. import pandas as pd
  10. import numpy as np
  11. import matplotlib.pyplot as plt
  12.  
  13.  
  14. # In[69]:
  15.  
  16.  
  17. # Read dataset
  18. test = pd.read_csv("test.csv")
  19.  
  20.  
  21. # In[70]:
  22.  
  23.  
  24. # EDA | Generate crosstab to review the dataset
  25. pd.crosstab(test.Sex, test.Pclass, normalize=False, margins=True)
  26.  
  27.  
  28. # In[71]:
  29.  
  30.  
  31. # add a column and set intial values
  32. test['ModelPrediction'] = 0
  33.  
  34.  
  35. # In[72]:
  36.  
  37.  
  38. # assign prediction for females = survived, unless travelling in Pclass = 3 | all males under 18 in PClass 1,2
  39. test.loc[test['Sex'] == "female", 'ModelPrediction'] = 1
  40. test.loc[test['Pclass'] == 3, 'ModelPrediction'] = 0
  41. test.loc[((test['Pclass'] == 1) | (test['Pclass'] == 2)) & (test['Sex'] == "male") & (test.Age < 18), 'ModelPrediction'] = 1
  42.  
  43.  
  44. # In[73]:
  45.  
  46.  
  47. # review results
  48. test.head()
  49.  
  50.  
  51. # In[74]:
  52.  
  53.  
  54. # calculate predicted survival outcome
  55. sum(test.ModelPrediction == 1) / test.shape[0] #shape counts the number of rows in the dataset
  56.  
  57.  
  58. # In[75]:
  59.  
  60.  
  61. # generate bar chart showing predicted survival outcome
  62. test.groupby(['ModelPrediction', 'Sex']).size().unstack().plot(kind='barh', stacked=True)
  63. plt.title('Predicted survival outcome by gender')
  64. plt.show()
  65.  
  66.  
  67. # In[76]:
  68.  
  69.  
  70. # Create a new data frame for the output
  71. submission = test.filter(['PassengerId', 'ModelPrediction'])
  72. submission.head()
  73.  
  74.  
  75. # In[77]:
  76.  
  77.  
  78. # Create the csv file output
  79. submission.to_csv('titanic_in_python_submission.csv', index=False)
  80.  
  81.  
  82. # End of Notebook
Add Comment
Please, Sign In to add comment