Guest User

Untitled

a guest
Jun 14th, 2018
92
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.29 KB | None | 0 0
  1. # 01 Calculate ECDF for Zoopla distribution model
  2. def ecdf(data):
  3. """Compute ECDF for a one-dimensional array of measurements."""
  4.  
  5. # Number of data points: n
  6. n = len(data)
  7.  
  8. # x-data for the ECDF: x
  9. x = np.sort(data)
  10.  
  11. # y-data for the ECDF: y
  12. y = np.arange(1, len(x)+1) / n
  13.  
  14. return x, y
  15.  
  16. #02 Upload the dataset
  17. #Import modules and packages
  18. import numpy as np
  19. import pandas as pd
  20. ds = pd.read_csv('ecdf_data_1.csv')
  21.  
  22. #03 Building-up data series for ECDF
  23. ## Construct test series of data called ds_test
  24. # Convert Pandas Series to Numpy.ndarray
  25. ds_test = ds['Price per bedroom'].values
  26.  
  27. ds_il = ds[(ds['AREA'] == 'Ilford')]
  28. ds_wd = ds[(ds['AREA'] == 'West Drayton')]
  29.  
  30. ds_il_1b = ds_il[(ds_il['BEDROOMS'] == 1)]
  31. ds_il_2b = ds_il[(ds_il['BEDROOMS'] == 2)]
  32. ds_il_3b = ds_il[(ds_il['BEDROOMS'] == 3)]
  33. ds_il_4b = ds_il[(ds_il['BEDROOMS'] == 4)]
  34. ds_il_5b = ds_il[(ds_il['BEDROOMS'] == 5)]
  35.  
  36. ds_wd_1b = ds_wd[(ds_wd['BEDROOMS'] == 1)]
  37. ds_wd_2b = ds_wd[(ds_wd['BEDROOMS'] == 2)]
  38. ds_wd_3b = ds_wd[(ds_wd['BEDROOMS'] == 3)]
  39. ds_wd_4b = ds_wd[(ds_wd['BEDROOMS'] == 4)]
  40. ds_wd_5b = ds_wd[(ds_wd['BEDROOMS'] == 5)]
  41.  
  42. ## Get values for x and y axis.
  43. x_test, y_test = ecdf(ds_test)
  44. x_wd, y_wd = ecdf(ds_wd['Price per bedroom'].values)
  45. x_il, y_il = ecdf(ds_il['Price per bedroom'].values)
  46.  
  47. x_il_1b, y_il_1b = ecdf(ds_il_1b['Price per bedroom'].values)
  48. x_il_2b, y_il_2b = ecdf(ds_il_2b['Price per bedroom'].values)
  49. x_il_3b, y_il_3b = ecdf(ds_il_3b['Price per bedroom'].values)
  50. x_il_4b, y_il_4b = ecdf(ds_il_4b['Price per bedroom'].values)
  51. x_il_5b, y_il_5b = ecdf(ds_il_5b['Price per bedroom'].values)
  52.  
  53. x_wd_1b, y_wd_1b = ecdf(ds_wd_1b['Price per bedroom'].values)
  54. x_wd_2b, y_wd_2b = ecdf(ds_wd_2b['Price per bedroom'].values)
  55. x_wd_3b, y_wd_3b = ecdf(ds_wd_3b['Price per bedroom'].values)
  56. x_wd_4b, y_wd_4b = ecdf(ds_wd_4b['Price per bedroom'].values)
  57. x_wd_5b, y_wd_5b = ecdf(ds_wd_5b['Price per bedroom'].values)
  58.  
  59. #04 Plotting the ECDF
  60. # Import matplotlib module for plotting
  61. import matplotlib.pyplot as plt
  62. import seaborn as sns
  63. import matplotlib.patches as mpatches
  64.  
  65. %matplotlib inline
  66.  
  67. # Generate plot
  68. sns.set_style('whitegrid')
  69.  
  70. _ = plt.plot(x_wd, y_wd, marker='.', linestyle='none', color = '#c00000', alpha = 0.5)
  71. _ = plt.plot(x_il, y_il, marker='.', linestyle='none', color = '#1f4e79', alpha = 0.5)
  72.  
  73. # Make the margins nice
  74. _ = plt.margins(0.02)
  75.  
  76. # Label the axes
  77. _ = plt.xlabel('Price per bedroom (£)', fontsize = 16, family='Arial')
  78. _ = plt.ylabel('ECDF', fontsize = 16)
  79.  
  80. plt.tick_params(labelsize=14)
  81.  
  82. # Set plot size
  83. plt.rcParams['figure.figsize'] = (11,7)
  84.  
  85. # Set axis style
  86. _ = plt.grid(b=True, which='major', color='#cccccc', linestyle='--')
  87.  
  88. # Add legend
  89. red_patch = mpatches.Patch(color='#c00000', label='Ilford')
  90. blue_patch = mpatches.Patch(color='#1f4e79', label='West Drayton')
  91. _ = plt.legend(handles=[red_patch, blue_patch], loc = 'upper left', fontsize = 16)
  92.  
  93. # Display the plot
  94. plt.show()
  95.  
  96. # Next one plot is below
  97. # Generate plot
  98. sns.set_style('whitegrid')
  99.  
  100. _ = plt.plot(x_il_1b, y_il_1b, marker='.', linestyle='none', color = '#c00000', alpha = 1, markersize=12)
  101. _ = plt.plot(x_il_2b, y_il_2b, marker='.', linestyle='none', color = '#1f4e79', alpha = 1, markersize=12)
  102. _ = plt.plot(x_il_3b, y_il_3b, marker='.', linestyle='none', color = '#6e1a18', alpha = 1, markersize=12)
  103. _ = plt.plot(x_il_4b, y_il_4b, marker='.', linestyle='none', color = '#808080', alpha = 1, markersize=12)
  104. _ = plt.plot(x_il_5b, y_il_5b, marker='.', linestyle='none', color = '#ff8080', alpha = 1, markersize=12)
  105.  
  106. # Make the margins nice
  107. _ = plt.margins(0.02)
  108.  
  109. # Label the axes
  110. _ = plt.xlabel('Price per bedroom (£)', fontsize = 16, family='Arial')
  111. _ = plt.ylabel('ECDF', fontsize = 16)
  112.  
  113. plt.tick_params(labelsize=14)
  114.  
  115. # Set plot size
  116. plt.rcParams['figure.figsize'] = (11,7)
  117.  
  118. # Set axis style
  119. _ = plt.grid(b=True, which='major', color='#cccccc', linestyle='--')
  120.  
  121. # Add legend
  122. patch_01 = mpatches.Patch(color='#c00000', label='1 bedroom properties')
  123. patch_02 = mpatches.Patch(color='#1f4e79', label='2 bedroom properties')
  124. patch_03 = mpatches.Patch(color='#6e1a18', label='3 bedroom properties')
  125. patch_04 = mpatches.Patch(color='#808080', label='4 bedroom properties')
  126. patch_05 = mpatches.Patch(color='#ff8080', label='5 bedroom properties')
  127. _ = plt.legend(handles=[patch_01, patch_02, patch_03, patch_04, patch_05], loc = 'lower right', fontsize = 16)
  128.  
  129. # Display the plot
  130. plt.show()
Add Comment
Please, Sign In to add comment