Advertisement
Korotkodul

Titanic2

Apr 11th, 2025 (edited)
364
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.95 KB | None | 0 0
  1. import numpy as np
  2. import pandas as pd
  3. import seaborn as sns
  4.  
  5. titanic_data = sns.load_dataset("titanic")
  6. print(titanic_data)
  7. print()
  8.  
  9. #Часть 1
  10. print("PART1\n")
  11. zero_cnt = np.sum(titanic_data.isnull(), axis = 0)
  12. print("zero_cnt:\n", zero_cnt)
  13. print()
  14.  
  15. #Часть 2
  16. print("\nPART2\n")
  17. #Удалите все столбцы, количество пропусков в которых превышает половину количества строк в таблице.
  18. half_rows = titanic_data.shape[0] // 2
  19. columns_to_drop = titanic_data.columns[zero_cnt > half_rows]
  20. titanic_data = titanic_data.drop(columns=columns_to_drop)
  21. print('columns_to_drop:', columns_to_drop, "- удаляем эти столбцы")
  22. print()
  23. #NB!Теперь работаем с titanic_cleaned
  24.  
  25. #После того, как вы удалите все столбцы, нарушающие описанное условие, удалите все строки, количество пропусков в которых превышает половину количества столбцов.
  26. row_zero_cnt = np.sum(titanic_data.isnull(), axis = 1)
  27. half_rows = titanic_data.shape[1] // 2
  28. """
  29. print("shape:", titanic_cleaned.shape)
  30. print(type(row_zero_cnt))
  31. print(row_zero_cnt)"""
  32. print("half_rows:",half_rows)
  33. print("Максимальное пропусков в строке:", row_zero_cnt.max())
  34. to_be_left = row_zero_cnt <= half_rows
  35. #print(type(to_be_left))
  36. #(to_be_left)
  37. titanic_data =  titanic_data[to_be_left]
  38. #print(titanic.shape)
  39. print("Вывод: мы не удалили ни одной строки")
  40. #NB!Теперь работаем с titanic
  41.  
  42. #Часть 3
  43. print("\nPART3\n")
  44. man_mask = (titanic_data['who'] == 'man')
  45. #print("man_mask\n",male_mask)
  46. man_age_median = np.median(titanic_data.loc[man_mask, 'age'].dropna()).round()
  47. #print("man_age_median:", man_age_median)
  48. #print()
  49. titanic_data.loc[(titanic_data['who'] == 'man') & (titanic_data['age'].isna()), 'age'] = man_age_median
  50.  
  51. woman_mask = (titanic_data['who'] == 'woman')
  52. woman_age_median = np.median(titanic_data.loc[woman_mask, 'age'].dropna()).round()
  53. #print("woman_age_median:", woman_age_median)
  54. #print()
  55. titanic_data.loc[(titanic_data['who'] == 'woman') & (titanic_data['age'].isna()), 'age'] = woman_age_median
  56.  
  57. child_mask = (titanic_data['who'] == 'child')
  58. child_age_median = np.median(titanic_data.loc[child_mask, 'age'].dropna()).round()
  59. #print("child_age_median:", child_age_median)
  60. #print()
  61. titanic_data.loc[(titanic_data['who'] == 'child') & (titanic_data['age'].isna()), 'age'] = child_age_median
  62.  
  63. #Часть4
  64. print("\nPART4\n")
  65. null_counts = np.sum(titanic_data.isna(), axis=1)
  66. #print("Распределение пропусков по строкам:")
  67. #print(null_counts.value_counts().sort_index())
  68. #print(null_counts)
  69. mask = null_counts <= 1
  70. #print("mask:", type(mask))
  71. titanic_data = titanic_data[mask]
  72.  
  73.  
  74. #Часть5
  75. print("\nPART5\n")
  76. city_cnt = titanic_data["embark_town"].value_counts()
  77. city = city_cnt.idxmax()
  78. print(city)
  79.  
  80. #Часть 6
  81. print("\nPART6\n")
  82. #percent_of_surv1 = (titanic_data["survived"].value_counts(normalize=True))[1]
  83. #print(round(percent_of_surv1, 2))
  84. surv_percent = np.round(titanic_data["survived"].value_counts(normalize=True)[1] * 100, 2)
  85. print(surv_percent)
  86.  
  87. #Часть7
  88. print("\nPART7\n")
  89. surv_cnt = titanic_data[titanic_data["survived"]==1].groupby("embarked")["survived"].count()
  90. print(surv_cnt)
  91.  
  92. #Часть 8
  93. print("\nPART8\n")
  94. surv_class_cnt = titanic_data[titanic_data["survived"]==1].groupby("class", observed=True)["survived"].count()
  95. print(surv_class_cnt)
  96.  
  97. #Часть 9
  98. print("\nPART9\n")
  99. surv_rich_percent = 100 * titanic_data.loc[titanic_data["fare"] >= 100, "survived"].value_counts(normalize=True)[1]
  100. print(np.round(surv_rich_percent, 2))
  101.  
  102. #Часть 10
  103. print("\nPART10\n")
  104. child_alone = titanic_data.loc[(titanic_data["who"] == "child") & (titanic_data["alone"] == True)]
  105. print(child_alone.shape)
  106. print(child_alone)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement