Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- pd.options.display.max_columns=15
- pd.set_option('expand_frame_repr', False)
- data=pd.read_csv("path of your train data set")
- data=data.fillna(0)
- Pclass=data.groupby(['Pclass','Survived'])['PassengerId'].count()
- print pd.DataFrame({"Frequency":Pclass,"Percentage":Pclass.apply(lambda xx: 100*xx/float(Pclass.sum())),'Cumulative_Frequency':Pclass.cumsum(),'Cumulative_percentage':Pclass.cumsum().apply(lambda xx: 100*xx/float(Pclass.sum()))})#.value_counts(sort=False,normalize=True)#.plot(kind='barh')
- x = data[(data['Age']<=25)]
- y = pd.merge(data[(data['Age']<=50)],data[(data['Age']>25)],how='inner',on='PassengerId')
- z = data[(data['Age']>50)]
- x=x.groupby(['Sex','Survived'])['PassengerId'].count()
- y=y.groupby(['Sex_x','Survived_x'])['PassengerId'].count()
- z=z.groupby(['Sex','Survived'])['PassengerId'].count()
- print pd.DataFrame({'for age<=25':x,'for 25<age<=50':y,'for age>=50':z})#.plot(kind='barh')
- x=data.groupby(['SibSp','Survived'])['PassengerId'].count()
- y=data.groupby(['Parch','Survived'])['PassengerId'].count()
- z=pd.DataFrame({'SibSp':x,'Parch':y,'SibSp_cumulative':x.cumsum()\
- ,'SibSp_freq_dist':x.apply(lambda xx: 100*xx/float(x.sum()))\
- ,'Parch_cumulative':y.cumsum(),'Parch_freq_dist':y.apply(lambda xx: 100*xx/float(x.sum()))}).fillna(0)
- print z
Add Comment
Please, Sign In to add comment