Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # 01 Calculate ECDF for Zoopla distribution model
- def ecdf(data):
- """Compute ECDF for a one-dimensional array of measurements."""
- # Number of data points: n
- n = len(data)
- # x-data for the ECDF: x
- x = np.sort(data)
- # y-data for the ECDF: y
- y = np.arange(1, len(x)+1) / n
- return x, y
- #02 Upload the dataset
- #Import modules and packages
- import numpy as np
- import pandas as pd
- ds = pd.read_csv('ecdf_data_1.csv')
- #03 Building-up data series for ECDF
- ## Construct test series of data called ds_test
- # Convert Pandas Series to Numpy.ndarray
- ds_test = ds['Price per bedroom'].values
- ds_il = ds[(ds['AREA'] == 'Ilford')]
- ds_wd = ds[(ds['AREA'] == 'West Drayton')]
- ds_il_1b = ds_il[(ds_il['BEDROOMS'] == 1)]
- ds_il_2b = ds_il[(ds_il['BEDROOMS'] == 2)]
- ds_il_3b = ds_il[(ds_il['BEDROOMS'] == 3)]
- ds_il_4b = ds_il[(ds_il['BEDROOMS'] == 4)]
- ds_il_5b = ds_il[(ds_il['BEDROOMS'] == 5)]
- ds_wd_1b = ds_wd[(ds_wd['BEDROOMS'] == 1)]
- ds_wd_2b = ds_wd[(ds_wd['BEDROOMS'] == 2)]
- ds_wd_3b = ds_wd[(ds_wd['BEDROOMS'] == 3)]
- ds_wd_4b = ds_wd[(ds_wd['BEDROOMS'] == 4)]
- ds_wd_5b = ds_wd[(ds_wd['BEDROOMS'] == 5)]
- ## Get values for x and y axis.
- x_test, y_test = ecdf(ds_test)
- x_wd, y_wd = ecdf(ds_wd['Price per bedroom'].values)
- x_il, y_il = ecdf(ds_il['Price per bedroom'].values)
- x_il_1b, y_il_1b = ecdf(ds_il_1b['Price per bedroom'].values)
- x_il_2b, y_il_2b = ecdf(ds_il_2b['Price per bedroom'].values)
- x_il_3b, y_il_3b = ecdf(ds_il_3b['Price per bedroom'].values)
- x_il_4b, y_il_4b = ecdf(ds_il_4b['Price per bedroom'].values)
- x_il_5b, y_il_5b = ecdf(ds_il_5b['Price per bedroom'].values)
- x_wd_1b, y_wd_1b = ecdf(ds_wd_1b['Price per bedroom'].values)
- x_wd_2b, y_wd_2b = ecdf(ds_wd_2b['Price per bedroom'].values)
- x_wd_3b, y_wd_3b = ecdf(ds_wd_3b['Price per bedroom'].values)
- x_wd_4b, y_wd_4b = ecdf(ds_wd_4b['Price per bedroom'].values)
- x_wd_5b, y_wd_5b = ecdf(ds_wd_5b['Price per bedroom'].values)
- #04 Plotting the ECDF
- # Import matplotlib module for plotting
- import matplotlib.pyplot as plt
- import seaborn as sns
- import matplotlib.patches as mpatches
- %matplotlib inline
- # Generate plot
- sns.set_style('whitegrid')
- _ = plt.plot(x_wd, y_wd, marker='.', linestyle='none', color = '#c00000', alpha = 0.5)
- _ = plt.plot(x_il, y_il, marker='.', linestyle='none', color = '#1f4e79', alpha = 0.5)
- # Make the margins nice
- _ = plt.margins(0.02)
- # Label the axes
- _ = plt.xlabel('Price per bedroom (£)', fontsize = 16, family='Arial')
- _ = plt.ylabel('ECDF', fontsize = 16)
- plt.tick_params(labelsize=14)
- # Set plot size
- plt.rcParams['figure.figsize'] = (11,7)
- # Set axis style
- _ = plt.grid(b=True, which='major', color='#cccccc', linestyle='--')
- # Add legend
- red_patch = mpatches.Patch(color='#c00000', label='Ilford')
- blue_patch = mpatches.Patch(color='#1f4e79', label='West Drayton')
- _ = plt.legend(handles=[red_patch, blue_patch], loc = 'upper left', fontsize = 16)
- # Display the plot
- plt.show()
- # Next one plot is below
- # Generate plot
- sns.set_style('whitegrid')
- _ = plt.plot(x_il_1b, y_il_1b, marker='.', linestyle='none', color = '#c00000', alpha = 1, markersize=12)
- _ = plt.plot(x_il_2b, y_il_2b, marker='.', linestyle='none', color = '#1f4e79', alpha = 1, markersize=12)
- _ = plt.plot(x_il_3b, y_il_3b, marker='.', linestyle='none', color = '#6e1a18', alpha = 1, markersize=12)
- _ = plt.plot(x_il_4b, y_il_4b, marker='.', linestyle='none', color = '#808080', alpha = 1, markersize=12)
- _ = plt.plot(x_il_5b, y_il_5b, marker='.', linestyle='none', color = '#ff8080', alpha = 1, markersize=12)
- # Make the margins nice
- _ = plt.margins(0.02)
- # Label the axes
- _ = plt.xlabel('Price per bedroom (£)', fontsize = 16, family='Arial')
- _ = plt.ylabel('ECDF', fontsize = 16)
- plt.tick_params(labelsize=14)
- # Set plot size
- plt.rcParams['figure.figsize'] = (11,7)
- # Set axis style
- _ = plt.grid(b=True, which='major', color='#cccccc', linestyle='--')
- # Add legend
- patch_01 = mpatches.Patch(color='#c00000', label='1 bedroom properties')
- patch_02 = mpatches.Patch(color='#1f4e79', label='2 bedroom properties')
- patch_03 = mpatches.Patch(color='#6e1a18', label='3 bedroom properties')
- patch_04 = mpatches.Patch(color='#808080', label='4 bedroom properties')
- patch_05 = mpatches.Patch(color='#ff8080', label='5 bedroom properties')
- _ = plt.legend(handles=[patch_01, patch_02, patch_03, patch_04, patch_05], loc = 'lower right', fontsize = 16)
- # Display the plot
- plt.show()
Add Comment
Please, Sign In to add comment