Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- pd.set_option('display.max_columns', 15)
- data = pd.read_csv("train.csv")
- data_test=pd.read_csv("test.csv")
- print(data.dtypes) # Finding datatypes for all feautures
- print(data["PassengerId"].isnull().values.any()) # Null Value is there or not
- print(data["Survived"].isnull().values.any())
- print(data["Pclass"].isnull().values.any())
- print(data["Name"].isnull().values.any())
- print(data["Sex"].isnull().values.any())
- print(data["Age"].isnull().values.any())
- print(data["SibSp"].isnull().values.any())
- print(data["Parch"].isnull().values.any())
- print(data["Ticket"].isnull().values.any())
- print(data["Fare"].isnull().values.any())
- print(data["Cabin"].isnull().values.any())
- print(data["Embarked"].isnull().values.any())
- print("\n")
- print("\n")
- print("\n")
- print("\n")
- print(data_test["PassengerId"].isnull().values.any()) # Null Value is there or not
- #print(data_test["Pclass"].isnull().values.any())
- print(data_test["Name"].isnull().values.any())
- print(data_test["Sex"].isnull().values.any())
- print(data_test["Age"].isnull().values.any())
- print(data_test["SibSp"].isnull().values.any())
- print(data_test["Parch"].isnull().values.any())
- print(data_test["Ticket"].isnull().values.any())
- print(data_test["Fare"].isnull().values.any())
- print(data_test["Cabin"].isnull().values.any())
- print(data_test["Embarked"].isnull().values.any())
- print("\n")
- print("\n")
- print(data.describe())
- print(data["Age"].count())
- print(data["Age"].mean())
- print(data["Age"].std())
- print(data["Age"].min())
- print(data["Age"].max())
- print(data["Age"].quantile(.25))
- print(data["Age"].quantile(.50))
- print(data["Age"].quantile(.75))
- print(data["SibSp"].count())
- print(data["SibSp"].mean())
- print(data["SibSp"].std())
- print(data["SibSp"].min())
- print(data["SibSp"].max())
- print(data["SibSp"].quantile(.25))
- print(data["SibSp"].quantile(.50))
- print(data["SibSp"].quantile(.75))
- print(data["Parch"].count())
- print(data["Parch"].mean())
- print(data["Parch"].std())
- print(data["Parch"].min())
- print(data["Parch"].max())
- print(data["Parch"].quantile(.25))
- print(data["Parch"].quantile(.50))
- print(data["Parch"].quantile(.75))
- print(data["Fare"].count())
- print(data["Fare"].mean())
- print(data["Fare"].std())
- print(data["Fare"].min())
- print(data["Fare"].max())
- print(data["Fare"].quantile(.25))
- print(data["Fare"].quantile(.50))
- print(data["Fare"].quantile(.75))
- #print(data['Fare'].count)
- #print(data.isnull().sum())
- #filter = data["Name"]=="Braund, Mr. Owen Harris"
- #print(data.where(filter))
- #print(data[(data["Age"]==54) & (data["Sex"]=="male")].head(15))
- print(data["Survived"].count())
- print(data["Survived"].nunique()) # Number of unique values
- print(data["Survived"].value_counts().idxmax()) #Most frequent value
- #print(data["Survived"].value_counts()) #Most frequent value
- print(data["Pclass"].count())
- print(data["Pclass"].nunique()) # Number of unique values
- print(data["Pclass"].value_counts().idxmax()) #Most frequent value
- print(data["Sex"].count())
- print(data["Sex"].nunique()) # Number of unique values
- print(data["Sex"].value_counts().idxmax()) #Most frequent value
- print(data["Embarked"].count())
- print(data["Embarked"].nunique()) # Number of unique values
- print(data["Embarked"].value_counts().idxmax()) #Most frequent value
- data.drop(["Survived"], axis=1)
- combine=pd.concat([data,data_test])
- print(combine.describe())
- print(combine["Survived"].count())
- print(combine["Survived"].nunique()) # Number of unique values
- print(combine["Survived"].value_counts().idxmax()) #Most frequent value
- #print(data["Survived"].value_counts()) #Most frequent value
- print("\n")
- print(combine["Pclass"].count())
- print(combine["Pclass"].nunique()) # Number of unique values
- print(combine["Pclass"].value_counts().idxmax()) #Most frequent value
- print("\n")
- print(combine["Sex"].count())
- print(combine["Sex"].nunique()) # Number of unique values
- print(combine["Sex"].value_counts().idxmax()) #Most frequent value
- print("\n")
- print(combine["Embarked"].count())
- print(combine["Embarked"].nunique()) # Number of unique values
- print(combine["Embarked"].value_counts().idxmax()) #Most frequent value
- print(combine["Survived"].describe())
- print(combine["Embarked"].describe())
- print(combine["Pclass"].describe())
- print(combine["Sex"].describe())
Add Comment
Please, Sign In to add comment