deathcoder007

Titanic:disaster_1

Sep 4th, 2016
90
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.30 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. pd.options.display.max_columns=15
  4. pd.set_option('expand_frame_repr', False)
  5. data=pd.read_csv("path of your train data set")
  6. data=data.fillna(0)
  7.  
  8.  
  9. Pclass=data.groupby(['Pclass','Survived'])['PassengerId'].count()
  10. print pd.DataFrame({"Frequency":Pclass,"Percentage":Pclass.apply(lambda xx: 100*xx/float(Pclass.sum())),'Cumulative_Frequency':Pclass.cumsum(),'Cumulative_percentage':Pclass.cumsum().apply(lambda xx: 100*xx/float(Pclass.sum()))})#.value_counts(sort=False,normalize=True)#.plot(kind='barh')
  11.  
  12.  
  13. x = data[(data['Age']<=25)]
  14. y = pd.merge(data[(data['Age']<=50)],data[(data['Age']>25)],how='inner',on='PassengerId')
  15. z = data[(data['Age']>50)]
  16. x=x.groupby(['Sex','Survived'])['PassengerId'].count()
  17. y=y.groupby(['Sex_x','Survived_x'])['PassengerId'].count()
  18. z=z.groupby(['Sex','Survived'])['PassengerId'].count()
  19. print pd.DataFrame({'for age<=25':x,'for 25<age<=50':y,'for age>=50':z})#.plot(kind='barh')
  20.  
  21.  
  22. x=data.groupby(['SibSp','Survived'])['PassengerId'].count()
  23. y=data.groupby(['Parch','Survived'])['PassengerId'].count()
  24. z=pd.DataFrame({'SibSp':x,'Parch':y,'SibSp_cumulative':x.cumsum()\
  25. ,'SibSp_freq_dist':x.apply(lambda xx: 100*xx/float(x.sum()))\
  26. ,'Parch_cumulative':y.cumsum(),'Parch_freq_dist':y.apply(lambda xx: 100*xx/float(x.sum()))}).fillna(0)
  27. print z
Add Comment
Please, Sign In to add comment