Moinak

Assignment_1_Machine_Learning

Jan 24th, 2020
61
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.33 KB | None | 0 0
  1. import pandas as pd
  2. pd.set_option('display.max_columns', 15)
  3. data = pd.read_csv("train.csv")
  4. data_test=pd.read_csv("test.csv")
  5. print(data.dtypes) # Finding datatypes for all feautures
  6.  
  7.  
  8. print(data["PassengerId"].isnull().values.any()) # Null Value is there or not
  9. print(data["Survived"].isnull().values.any())
  10. print(data["Pclass"].isnull().values.any())
  11. print(data["Name"].isnull().values.any())
  12. print(data["Sex"].isnull().values.any())
  13. print(data["Age"].isnull().values.any())
  14. print(data["SibSp"].isnull().values.any())
  15. print(data["Parch"].isnull().values.any())
  16. print(data["Ticket"].isnull().values.any())
  17. print(data["Fare"].isnull().values.any())
  18. print(data["Cabin"].isnull().values.any())
  19. print(data["Embarked"].isnull().values.any())
  20.  
  21. print("\n")
  22. print("\n")
  23. print("\n")
  24. print("\n")
  25.  
  26.  
  27. print(data_test["PassengerId"].isnull().values.any()) # Null Value is there or not
  28. #print(data_test["Pclass"].isnull().values.any())
  29. print(data_test["Name"].isnull().values.any())
  30. print(data_test["Sex"].isnull().values.any())
  31. print(data_test["Age"].isnull().values.any())
  32. print(data_test["SibSp"].isnull().values.any())
  33. print(data_test["Parch"].isnull().values.any())
  34. print(data_test["Ticket"].isnull().values.any())
  35. print(data_test["Fare"].isnull().values.any())
  36. print(data_test["Cabin"].isnull().values.any())
  37. print(data_test["Embarked"].isnull().values.any())
  38.  
  39. print("\n")
  40.  
  41. print("\n")
  42.  
  43. print(data.describe())
  44.  
  45.  
  46. print(data["Age"].count())
  47. print(data["Age"].mean())
  48. print(data["Age"].std())
  49. print(data["Age"].min())
  50. print(data["Age"].max())
  51. print(data["Age"].quantile(.25))
  52. print(data["Age"].quantile(.50))
  53. print(data["Age"].quantile(.75))
  54.  
  55.  
  56. print(data["SibSp"].count())
  57. print(data["SibSp"].mean())
  58. print(data["SibSp"].std())
  59. print(data["SibSp"].min())
  60. print(data["SibSp"].max())
  61. print(data["SibSp"].quantile(.25))
  62. print(data["SibSp"].quantile(.50))
  63. print(data["SibSp"].quantile(.75))
  64.  
  65.  
  66. print(data["Parch"].count())
  67. print(data["Parch"].mean())
  68. print(data["Parch"].std())
  69. print(data["Parch"].min())
  70. print(data["Parch"].max())
  71. print(data["Parch"].quantile(.25))
  72. print(data["Parch"].quantile(.50))
  73. print(data["Parch"].quantile(.75))
  74.  
  75.  
  76. print(data["Fare"].count())
  77. print(data["Fare"].mean())
  78. print(data["Fare"].std())
  79. print(data["Fare"].min())
  80. print(data["Fare"].max())
  81. print(data["Fare"].quantile(.25))
  82. print(data["Fare"].quantile(.50))
  83. print(data["Fare"].quantile(.75))
  84.  
  85.  
  86.  
  87.  
  88. #print(data['Fare'].count)
  89. #print(data.isnull().sum())
  90.  
  91. #filter = data["Name"]=="Braund, Mr. Owen Harris"
  92. #print(data.where(filter))
  93.  
  94. #print(data[(data["Age"]==54) & (data["Sex"]=="male")].head(15))
  95.  
  96.  
  97.  
  98.  
  99. print(data["Survived"].count())
  100. print(data["Survived"].nunique()) # Number of unique values
  101. print(data["Survived"].value_counts().idxmax()) #Most frequent value
  102.  
  103. #print(data["Survived"].value_counts()) #Most frequent value
  104.  
  105. print(data["Pclass"].count())
  106. print(data["Pclass"].nunique()) # Number of unique values
  107. print(data["Pclass"].value_counts().idxmax()) #Most frequent value
  108.  
  109.  
  110. print(data["Sex"].count())
  111. print(data["Sex"].nunique()) # Number of unique values
  112. print(data["Sex"].value_counts().idxmax()) #Most frequent value
  113.  
  114. print(data["Embarked"].count())
  115. print(data["Embarked"].nunique()) # Number of unique values
  116. print(data["Embarked"].value_counts().idxmax()) #Most frequent value
  117.  
  118.  
  119.  
  120. data.drop(["Survived"], axis=1)
  121. combine=pd.concat([data,data_test])
  122.  
  123. print(combine.describe())
  124.  
  125.  
  126. print(combine["Survived"].count())
  127. print(combine["Survived"].nunique()) # Number of unique values
  128. print(combine["Survived"].value_counts().idxmax()) #Most frequent value
  129.  
  130. #print(data["Survived"].value_counts()) #Most frequent value
  131.  
  132. print("\n")
  133.  
  134. print(combine["Pclass"].count())
  135. print(combine["Pclass"].nunique()) # Number of unique values
  136. print(combine["Pclass"].value_counts().idxmax()) #Most frequent value
  137.  
  138. print("\n")
  139.  
  140. print(combine["Sex"].count())
  141. print(combine["Sex"].nunique()) # Number of unique values
  142. print(combine["Sex"].value_counts().idxmax()) #Most frequent value
  143.  
  144. print("\n")
  145.  
  146. print(combine["Embarked"].count())
  147. print(combine["Embarked"].nunique()) # Number of unique values
  148. print(combine["Embarked"].value_counts().idxmax()) #Most frequent value
  149.  
  150.  
  151.  
  152. print(combine["Survived"].describe())
  153. print(combine["Embarked"].describe())
  154. print(combine["Pclass"].describe())
  155. print(combine["Sex"].describe())
Add Comment
Please, Sign In to add comment