• API
• FAQ
• Tools
• Archive
daily pastebin goal
7%
SHARE
TWEET

# Untitled

a guest Jun 14th, 2018 48 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. # 01 Calculate ECDF for Zoopla distribution model
2. def ecdf(data):
3.     """Compute ECDF for a one-dimensional array of measurements."""
4.
5.     # Number of data points: n
6.     n = len(data)
7.
8.     # x-data for the ECDF: x
9.     x = np.sort(data)
10.
11.     # y-data for the ECDF: y
12.     y = np.arange(1, len(x)+1) / n
13.
14.     return x, y
15.
17. #Import modules and packages
18. import numpy as np
19. import pandas as pd
21.
22. #03 Building-up data series for ECDF
23. ## Construct test series of data called ds_test
24. # Convert Pandas Series to Numpy.ndarray
25. ds_test = ds['Price per bedroom'].values
26.
27. ds_il = ds[(ds['AREA'] == 'Ilford')]
28. ds_wd = ds[(ds['AREA'] == 'West Drayton')]
29.
30. ds_il_1b = ds_il[(ds_il['BEDROOMS'] == 1)]
31. ds_il_2b = ds_il[(ds_il['BEDROOMS'] == 2)]
32. ds_il_3b = ds_il[(ds_il['BEDROOMS'] == 3)]
33. ds_il_4b = ds_il[(ds_il['BEDROOMS'] == 4)]
34. ds_il_5b = ds_il[(ds_il['BEDROOMS'] == 5)]
35.
36. ds_wd_1b = ds_wd[(ds_wd['BEDROOMS'] == 1)]
37. ds_wd_2b = ds_wd[(ds_wd['BEDROOMS'] == 2)]
38. ds_wd_3b = ds_wd[(ds_wd['BEDROOMS'] == 3)]
39. ds_wd_4b = ds_wd[(ds_wd['BEDROOMS'] == 4)]
40. ds_wd_5b = ds_wd[(ds_wd['BEDROOMS'] == 5)]
41.
42. ## Get values for x and y axis.
43. x_test, y_test = ecdf(ds_test)
44. x_wd, y_wd = ecdf(ds_wd['Price per bedroom'].values)
45. x_il, y_il = ecdf(ds_il['Price per bedroom'].values)
46.
47. x_il_1b, y_il_1b = ecdf(ds_il_1b['Price per bedroom'].values)
48. x_il_2b, y_il_2b = ecdf(ds_il_2b['Price per bedroom'].values)
49. x_il_3b, y_il_3b = ecdf(ds_il_3b['Price per bedroom'].values)
50. x_il_4b, y_il_4b = ecdf(ds_il_4b['Price per bedroom'].values)
51. x_il_5b, y_il_5b = ecdf(ds_il_5b['Price per bedroom'].values)
52.
53. x_wd_1b, y_wd_1b = ecdf(ds_wd_1b['Price per bedroom'].values)
54. x_wd_2b, y_wd_2b = ecdf(ds_wd_2b['Price per bedroom'].values)
55. x_wd_3b, y_wd_3b = ecdf(ds_wd_3b['Price per bedroom'].values)
56. x_wd_4b, y_wd_4b = ecdf(ds_wd_4b['Price per bedroom'].values)
57. x_wd_5b, y_wd_5b = ecdf(ds_wd_5b['Price per bedroom'].values)
58.
59. #04 Plotting the ECDF
60. # Import matplotlib module for plotting
61. import matplotlib.pyplot as plt
62. import seaborn as sns
63. import matplotlib.patches as mpatches
64.
65. %matplotlib inline
66.
67. # Generate plot
68. sns.set_style('whitegrid')
69.
70. _ = plt.plot(x_wd, y_wd, marker='.', linestyle='none', color = '#c00000', alpha = 0.5)
71. _ = plt.plot(x_il, y_il, marker='.', linestyle='none', color = '#1f4e79', alpha = 0.5)
72.
73. # Make the margins nice
74. _ = plt.margins(0.02)
75.
76. # Label the axes
77. _ = plt.xlabel('Price per bedroom (£)', fontsize = 16, family='Arial')
78. _ = plt.ylabel('ECDF', fontsize = 16)
79.
80. plt.tick_params(labelsize=14)
81.
82. # Set plot size
83. plt.rcParams['figure.figsize'] = (11,7)
84.
85. # Set axis style
86. _ = plt.grid(b=True, which='major', color='#cccccc', linestyle='--')
87.
89. red_patch = mpatches.Patch(color='#c00000', label='Ilford')
90. blue_patch = mpatches.Patch(color='#1f4e79', label='West Drayton')
91. _ = plt.legend(handles=[red_patch, blue_patch], loc = 'upper left', fontsize = 16)
92.
93. # Display the plot
94. plt.show()
95.
96. # Next one plot is below
97. # Generate plot
98. sns.set_style('whitegrid')
99.
100. _ = plt.plot(x_il_1b, y_il_1b, marker='.', linestyle='none', color = '#c00000', alpha = 1, markersize=12)
101. _ = plt.plot(x_il_2b, y_il_2b, marker='.', linestyle='none', color = '#1f4e79', alpha = 1, markersize=12)
102. _ = plt.plot(x_il_3b, y_il_3b, marker='.', linestyle='none', color = '#6e1a18', alpha = 1, markersize=12)
103. _ = plt.plot(x_il_4b, y_il_4b, marker='.', linestyle='none', color = '#808080', alpha = 1, markersize=12)
104. _ = plt.plot(x_il_5b, y_il_5b, marker='.', linestyle='none', color = '#ff8080', alpha = 1, markersize=12)
105.
106. # Make the margins nice
107. _ = plt.margins(0.02)
108.
109. # Label the axes
110. _ = plt.xlabel('Price per bedroom (£)', fontsize = 16, family='Arial')
111. _ = plt.ylabel('ECDF', fontsize = 16)
112.
113. plt.tick_params(labelsize=14)
114.
115. # Set plot size
116. plt.rcParams['figure.figsize'] = (11,7)
117.
118. # Set axis style
119. _ = plt.grid(b=True, which='major', color='#cccccc', linestyle='--')
120.