daily pastebin goal
59%
SHARE
TWEET

Untitled

a guest Jun 14th, 2018 47 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # 01 Calculate ECDF for Zoopla distribution model
  2. def ecdf(data):
  3.     """Compute ECDF for a one-dimensional array of measurements."""
  4.  
  5.     # Number of data points: n
  6.     n = len(data)
  7.  
  8.     # x-data for the ECDF: x
  9.     x = np.sort(data)
  10.  
  11.     # y-data for the ECDF: y
  12.     y = np.arange(1, len(x)+1) / n
  13.  
  14.     return x, y
  15.    
  16. #02 Upload the dataset
  17. #Import modules and packages
  18. import numpy as np
  19. import pandas as pd
  20. ds = pd.read_csv('ecdf_data_1.csv')
  21.  
  22. #03 Building-up data series for ECDF
  23. ## Construct test series of data called ds_test
  24. # Convert Pandas Series to Numpy.ndarray
  25. ds_test = ds['Price per bedroom'].values
  26.  
  27. ds_il = ds[(ds['AREA'] == 'Ilford')]
  28. ds_wd = ds[(ds['AREA'] == 'West Drayton')]
  29.  
  30. ds_il_1b = ds_il[(ds_il['BEDROOMS'] == 1)]
  31. ds_il_2b = ds_il[(ds_il['BEDROOMS'] == 2)]
  32. ds_il_3b = ds_il[(ds_il['BEDROOMS'] == 3)]
  33. ds_il_4b = ds_il[(ds_il['BEDROOMS'] == 4)]
  34. ds_il_5b = ds_il[(ds_il['BEDROOMS'] == 5)]
  35.  
  36. ds_wd_1b = ds_wd[(ds_wd['BEDROOMS'] == 1)]
  37. ds_wd_2b = ds_wd[(ds_wd['BEDROOMS'] == 2)]
  38. ds_wd_3b = ds_wd[(ds_wd['BEDROOMS'] == 3)]
  39. ds_wd_4b = ds_wd[(ds_wd['BEDROOMS'] == 4)]
  40. ds_wd_5b = ds_wd[(ds_wd['BEDROOMS'] == 5)]
  41.  
  42. ## Get values for x and y axis.
  43. x_test, y_test = ecdf(ds_test)
  44. x_wd, y_wd = ecdf(ds_wd['Price per bedroom'].values)
  45. x_il, y_il = ecdf(ds_il['Price per bedroom'].values)
  46.  
  47. x_il_1b, y_il_1b = ecdf(ds_il_1b['Price per bedroom'].values)
  48. x_il_2b, y_il_2b = ecdf(ds_il_2b['Price per bedroom'].values)
  49. x_il_3b, y_il_3b = ecdf(ds_il_3b['Price per bedroom'].values)
  50. x_il_4b, y_il_4b = ecdf(ds_il_4b['Price per bedroom'].values)
  51. x_il_5b, y_il_5b = ecdf(ds_il_5b['Price per bedroom'].values)
  52.  
  53. x_wd_1b, y_wd_1b = ecdf(ds_wd_1b['Price per bedroom'].values)
  54. x_wd_2b, y_wd_2b = ecdf(ds_wd_2b['Price per bedroom'].values)
  55. x_wd_3b, y_wd_3b = ecdf(ds_wd_3b['Price per bedroom'].values)
  56. x_wd_4b, y_wd_4b = ecdf(ds_wd_4b['Price per bedroom'].values)
  57. x_wd_5b, y_wd_5b = ecdf(ds_wd_5b['Price per bedroom'].values)
  58.  
  59. #04 Plotting the ECDF
  60. # Import matplotlib module for plotting
  61. import matplotlib.pyplot as plt
  62. import seaborn as sns
  63. import matplotlib.patches as mpatches
  64.  
  65. %matplotlib inline
  66.  
  67. # Generate plot
  68. sns.set_style('whitegrid')
  69.  
  70. _ = plt.plot(x_wd, y_wd, marker='.', linestyle='none', color = '#c00000', alpha = 0.5)
  71. _ = plt.plot(x_il, y_il, marker='.', linestyle='none', color = '#1f4e79', alpha = 0.5)
  72.  
  73. # Make the margins nice
  74. _ = plt.margins(0.02)
  75.  
  76. # Label the axes
  77. _ = plt.xlabel('Price per bedroom (£)', fontsize = 16, family='Arial')
  78. _ = plt.ylabel('ECDF', fontsize = 16)
  79.  
  80. plt.tick_params(labelsize=14)
  81.  
  82. # Set plot size
  83. plt.rcParams['figure.figsize'] = (11,7)
  84.  
  85. # Set axis style
  86. _ = plt.grid(b=True, which='major', color='#cccccc', linestyle='--')
  87.  
  88. # Add legend
  89. red_patch = mpatches.Patch(color='#c00000', label='Ilford')
  90. blue_patch = mpatches.Patch(color='#1f4e79', label='West Drayton')
  91. _ = plt.legend(handles=[red_patch, blue_patch], loc = 'upper left', fontsize = 16)
  92.  
  93. # Display the plot
  94. plt.show()
  95.  
  96. # Next one plot is below
  97. # Generate plot
  98. sns.set_style('whitegrid')
  99.  
  100. _ = plt.plot(x_il_1b, y_il_1b, marker='.', linestyle='none', color = '#c00000', alpha = 1, markersize=12)
  101. _ = plt.plot(x_il_2b, y_il_2b, marker='.', linestyle='none', color = '#1f4e79', alpha = 1, markersize=12)
  102. _ = plt.plot(x_il_3b, y_il_3b, marker='.', linestyle='none', color = '#6e1a18', alpha = 1, markersize=12)
  103. _ = plt.plot(x_il_4b, y_il_4b, marker='.', linestyle='none', color = '#808080', alpha = 1, markersize=12)
  104. _ = plt.plot(x_il_5b, y_il_5b, marker='.', linestyle='none', color = '#ff8080', alpha = 1, markersize=12)
  105.  
  106. # Make the margins nice
  107. _ = plt.margins(0.02)
  108.  
  109. # Label the axes
  110. _ = plt.xlabel('Price per bedroom (£)', fontsize = 16, family='Arial')
  111. _ = plt.ylabel('ECDF', fontsize = 16)
  112.  
  113. plt.tick_params(labelsize=14)
  114.  
  115. # Set plot size
  116. plt.rcParams['figure.figsize'] = (11,7)
  117.  
  118. # Set axis style
  119. _ = plt.grid(b=True, which='major', color='#cccccc', linestyle='--')
  120.  
  121. # Add legend
  122. patch_01 = mpatches.Patch(color='#c00000', label='1 bedroom properties')
  123. patch_02 = mpatches.Patch(color='#1f4e79', label='2 bedroom properties')
  124. patch_03 = mpatches.Patch(color='#6e1a18', label='3 bedroom properties')
  125. patch_04 = mpatches.Patch(color='#808080', label='4 bedroom properties')
  126. patch_05 = mpatches.Patch(color='#ff8080', label='5 bedroom properties')
  127. _ = plt.legend(handles=[patch_01, patch_02, patch_03, patch_04, patch_05], loc = 'lower right', fontsize = 16)
  128.  
  129. # Display the plot
  130. plt.show()
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top