Advertisement
tuomasvaltanen

Untitled

Mar 16th, 2021 (edited)
939
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.16 KB | None | 0 0
  1. # lecture 4
  2.  
  3. import numpy as np
  4. import pandas as pd
  5.  
  6. # helper function to determine if one column has
  7. # even or odd numbers (True / False)
  8. def even_number(row):
  9.     if row['Y'] % 2 == 0:
  10.         return True
  11.     else:
  12.         return False
  13.  
  14.  
  15. # lambda version of previous function
  16. even_number_lambda = lambda x : x['Y'] % 2 == 0
  17.  
  18.  
  19. # random data 4x3 matrix, values between 1-10
  20. df = pd.DataFrame(np.random.randint(1, 10, 12).reshape(4,3),
  21.                   ['A', 'B', 'C', 'D'], ['X','Y','Z'])
  22.  
  23. # use our own function to determine even numbers
  24. # remember, axis = 1 => use columns instead of rows
  25. df['Even'] = df.apply(even_number_lambda, axis=1)
  26.  
  27.  
  28. # NEW FILE - HOUSING DATA
  29.  
  30. import numpy as np
  31. import pandas as pd
  32.  
  33. from scipy import stats
  34.  
  35. # force normal numbers instead of scientific notation
  36. pd.set_option('display.float_format', lambda x: '%.1f' % x)
  37.  
  38.  
  39. def age_group(row):
  40.     # yr_built
  41.     if row['yr_built'] >= 2000:
  42.         return 4
  43.     elif 1980 <= row['yr_built'] < 2000:
  44.         return 3
  45.     elif 1960 <= row['yr_built'] < 1980:
  46.         return 2
  47.     else:
  48.         return 1
  49.  
  50. houses = pd.read_csv('houses.csv')
  51.  
  52. print(houses.head())
  53.  
  54. # this should be the most expensive house
  55. single = houses[houses['id'] == 6762700020]
  56.  
  57. # getting the price of the single selected property
  58. price = single.iloc[0]['price']
  59. print(price)
  60.  
  61. # getting the ids of cheapest and most expensive properties
  62. cheapest_id = houses.loc[houses['price'].idxmin()]['id']
  63. expensive_id = houses.loc[houses['price'].idxmax()]['id']
  64.  
  65. print(houses['bedrooms'].max())
  66.  
  67. expensive_houses = houses[houses['price'] >= 2000000].count()
  68.  
  69. average_price_per_condition = houses.groupby('condition').mean()
  70.  
  71. # axis = 1 => drop a column, this time: id
  72. houses = houses.drop('id', axis=1)
  73. houses = houses.drop('date', axis=1)
  74. houses = houses.drop('zipcode', axis=1)
  75. houses = houses.drop('lat', axis=1)
  76. houses = houses.drop('long', axis=1)
  77. houses = houses.drop('yr_renovated', axis=1)
  78. houses = houses.drop('waterfront', axis=1)
  79. houses = houses.drop('view', axis=1)
  80. houses = houses.drop('sqft_living', axis=1)
  81. houses = houses.drop('sqft_lot', axis=1)
  82. houses = houses.drop('sqft_above', axis=1)
  83. houses = houses.drop('sqft_basement', axis=1)
  84.  
  85. houses['living_m2'] = round(houses['sqft_living15'] * 0.09290304, 0)
  86. houses['yard_m2'] = round(houses['sqft_lot15'] * 0.09290304, 0)
  87.  
  88. houses = houses.drop('sqft_living15', axis=1)
  89. houses = houses.drop('sqft_lot15', axis=1)
  90.  
  91. houses['age_group'] = houses.apply(age_group, axis=1)
  92. houses = houses.drop('yr_built', axis=1)
  93.  
  94.  
  95. # use normal numbers instead of scientific notation
  96. houses['price'] = houses['price'].astype('int64')
  97.  
  98. houses = houses[(np.abs(stats.zscore(houses)) < 3).all(axis=1)]
  99.  
  100. summary = houses.describe()
  101. print(summary)
  102.  
  103. correlations = houses.corr()
  104.  
  105. # NEW FILE - INCOMPLETE TEST DATA
  106.  
  107. import numpy as np
  108. import pandas as pd
  109.  
  110. test = pd.read_csv('incomplete_test.csv')
  111.  
  112. # handle missing values
  113. # rows without a year are removed
  114. test = test[test['year'].notnull()]
  115.  
  116. # rows without a condition, use the average (mean)
  117. test['condition'].fillna(test['condition'].mean(), inplace=True)
  118.  
  119. unique_areas = test['area'].unique()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement