Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def plot_by_group(yhat_all, val_Y_all, increment=50, metric='abs'):
- # Either plot absolute error by group or directional error by group.
- if metric == 'abs':
- err = np.abs((yhat_all - val_Y_all) / val_Y_all)
- title = "Absolute Percent Errors by house sales group"
- else:
- err = (yhat_all - val_Y_all) / val_Y_all
- title = "Raw Percent Errors by house sales group"
- # group by distances of increment
- true_bin = ((val_Y_all // increment) + 1) * increment
- err_bins = {}
- # Place errors into appropriate bins
- for pe, tb in zip(err, true_bin):
- if not tb in err_bins.keys():
- err_bins[tb] = []
- err_bins[tb] += [pe * 100]
- # Sort keys (bins) and organize error lists
- err_list = [err_bins[key] for key in sorted(err_bins)]
- #Calculate means and Interquartile ranges
- medians = [np.mean(err_list[idx]) for idx in range(len(err_list))]
- q25s = [np.percentile(err_list[idx], 25) for idx in range(len(err_list))]
- q75s = [np.percentile(err_list[idx], 75) for idx in range(len(err_list))]
- keys = sorted(err_bins)
- # create blank graph and graph the axis variable
- _, ax = plt.subplots(figsize=(15, 5))
- # label axes
- ax.set(
- xlabel='true time to sell house',
- ylabel='IQR of percent error by group',
- title=title)
- # Add std deviation bars to the plot
- ax.errorbar(
- keys,
- medians,
- yerr=[[medians[i] - q25s[i] for i in range(len(medians))],
- [q75s[i] - medians[i] for i in range(len(medians))]],
- fmt='-o')
- # set where xticks should occur
- ax.set_xticks(keys)
- # Label the x-ticks
- xticks = [
- str(key - 50)[:-2] + ' to ' + str(key)[:-2] + '\n' + 'size group:' +
- str(len(err_list[idx])) for idx, key in enumerate(sorted(err_bins))
- ]
- ax.set_xticklabels(xticks)
- plt.show()
Add Comment
Please, Sign In to add comment