View difference between Paste ID: <a href="/AkYX7zhg">AkYX7zhg</a> and <a href="/RHXJY47M">RHXJY47M</a>

# Calcualte stats for Wheel of Time series
1	-	# Calcualte stats for Wheel of Time series
1	+	#!/usr/bin/env python
2	-	# Each row is the number of 5,4,3,2,1 rations for a book in the series
2	+	# -- coding: utf-8 --
3	-	# Collected from goodreads.com, amazon.com and amazon.co.uk
3	+	"""
4		Calcualte stats for Wheel of Time series
5		Each row is the number of 5,4,3,2,1 rations for a book in the series
6		Collected from goodreads.com, amazon.com and amazon.co.uk
7		"""
8
9		goodreads = [
10		[45132, 32768, 15394, 4462, 2287], # 1
11		[-1, -1, -1, -1, -1], # 2
12		[31992, 26753, 11780, 2265, 795], # 3
13		[24523, 21922, 10282, 1747, 430], # 4
14		[16119, 16903, 9131, 1749, 371], # 5
15		[12290, 12332, 7866, 1750, 399], # 6
16		[9925, 11312, 8258, 2068, 451], # 7
17		[8841, 9805, 8043, 2511, 542], # 8
18		[8891, 9205, 7464, 2438, 622], # 9
19		[7411, 7549, 6040, 2272, 864], # 10
20		[8959, 8456, 4699, 1202, 321], # 11
21		[14569, 9046, 3169, 676, 363], # 12
22		[14147, 7671, 2337, 451, 292]] # 13
23
24		amazoncom = [
25		[1279, 404, 188, 138, 157], # 1
26		[393, 139, 37, 13, 15], # 2
27		[292, 122, 35, 23, 17], # 3
28		[275, 93, 39, 17, 19], # 4
29		[204, 97, 54, 27, 24], # 5
30		[227, 83, 55, 35, 35], # 6
31		[335, 187, 116, 73, 54], # 7
32		[361, 330, 379, 352, 466], # 8
33		[331, 302, 191, 184, 185], # 9
34		[199, 146, 253, 396, 1581], # 10
35		[184, 155, 108, 81, 111], # 11
36		[540, 149, 31, 22, 14], # 12
37		[1628, 346, 150, 62, 358]] # 13
38
39		amazoncouk = [
40		[144, 47, 20, 19, 20], # 1
41		[33, 25, 4, 0, 1], # 2
42		[46, 25, 8, 0, 5], # 3
43		[32, 12, 7, 0, 1], # 4
44		[21, 17, 5, 5, 4], # 5
45		[25, 10, 10, 1, 2], # 6
46		[19, 12, 2, 0, 0], # 7
47		[20, 21, 22, 16, 19], # 8
48		[44, 33, 26, 9, 10], # 9
49		[20, 18, 32, 60, 76], # 10
50		[38, 42, 17, 6, 5], # 11
51		[125, 22, 6, 3, 1], # 12
52		[96, 31, 8, 8, 5]] # 13
53
54		import math
55		import numpy as np
56		from matplotlib import pyplot as plot
57
58
59		def histo_stat(haxis, histo):
60		"""
61		Parameters: haxis - x axis, histo - weight for each entry in axis
62	-	return 0, 0, []
62	+
63		"""
64		sumw = sum(h for h in histo)
65		if sumw < 0:
66		return 0, 0, [0] * len(haxis)
67		ihisto = zip(histo, haxis)
68		mean = 1. / sumw * sum(w * x for w, x in ihisto)
69		std = math.sqrt(1. / sumw * sum(i[0] * (i[1] - mean) ** 2 for i in ihisto))
70	-	def run(name, data, color):
70	+
71		return mean, std, frac
72
73
74		def run(name, data):
75		"""
76		Paramters: name - datasetname, data - table with histgrams
77		"""
78		fig = plot.figure(1)
79	-	# Special case with no data
79	+
80	-	if name == "Goodreads.com" and i == 1:
80	+
81		stats = []
82		percs = []
83		for i, histo in enumerate(data):
84		mean, std, frac = histo_stat(haxis, histo)
85	-	stats = np.array(stats)
85	+	if mean == 0: # Special case with no data
86		mean = 4.15
87	-	xaxis = range(1, len(data)+1)
87	+
88	-	plot.errorbar(xaxis, stats[:, 0], xerr=0.5, yerr=stats[:, 1], color=color, fmt='-o')
88	+
89	-	plot.fill_between(xaxis, stats[:, 0] - stats[:, 1], stats[:, 0] + stats[:, 1], facecolor=color, alpha=0.3)
89	+	percs += [[100 * fi for fi in frac]]
90		print "{:>2}: {:.2f} +- {:.2f} [{}]".format(i + 1, mean, std, perc)
91	-	run("Amazon.com", amazoncom, "red")
91	+	return np.array(stats), np.array(percs)
92	-	run("Amazon.co.uk", amazoncouk, "blue")
92	+
93	-	run("Goodreads.com", goodreads, "green")
93	+
94		def plot_stats(stats, color):
95		"""
96		Plot statistics, can superimpose
97		"""
98		nx = stats.shape[0]
99		ax = plot.subplot(1, 1, 1, xlim=(1, 13))
100		xaxis = range(1, nx + 1)
101		plot.errorbar(
102		xaxis, stats[:, 0], xerr=0.5, yerr=stats[:, 1], color=color, fmt='-o')
103		plot.fill_between(xaxis, stats[:, 0] - stats[:, 1], stats[
104		:, 0] + stats[:, 1], facecolor=color, alpha=0.3)
105		plot.ylabel("score")
106		plot.xlabel("book")
107
108
109		def plot_percs(isub, percs, name):
110		"""
111		Plot percentages as image to a give subfig
112		"""
113		plot.subplot(3, 1, isub)
114		im = plot.imshow(
115		percs.T, extent=(1, percs.T.shape[1], 1, percs.T.shape[0]),
116		interpolation="nearest", clim=(0, 100))
117		plot.title(name)
118		plot.ylabel("score")
119		plot.xlabel("book")
120		return im
121
122		stats1, percs1 = run("Amazon.com", amazoncom)
123		stats2, percs2 = run("Amazon.co.uk", amazoncouk)
124		stats3, percs3 = run("Goodreads.com", goodreads)
125
126		fig1 = plot.figure(1)
127		plot_stats(stats1, "red")
128		plot_stats(stats2, "blue")
129		plot_stats(stats3, "green")
130
131		fig2 = plot.figure(2)
132		plot_percs(1, percs1, "Amazon.com")
133		plot_percs(2, percs2, "Amazon.co.uk")
134		im = plot_percs(3, percs3, "Goodreads.com")
135		cax = fig2.add_axes([0.8, 0.1, 0.03, 0.85])
136		fig2.colorbar(im, cax=cax, label="Percentage")
137		fig2.tight_layout()
138		plot.show()