SHOW:
|
|
- or go back to the newest paste.
| 1 | - | # Calcualte stats for Wheel of Time series |
| 1 | + | #!/usr/bin/env python |
| 2 | - | # Each row is the number of 5,4,3,2,1 rations for a book in the series |
| 2 | + | # -*- coding: utf-8 -*- |
| 3 | - | # Collected from goodreads.com, amazon.com and amazon.co.uk |
| 3 | + | """ |
| 4 | Calcualte stats for Wheel of Time series | |
| 5 | Each row is the number of 5,4,3,2,1 rations for a book in the series | |
| 6 | Collected from goodreads.com, amazon.com and amazon.co.uk | |
| 7 | """ | |
| 8 | ||
| 9 | goodreads = [ | |
| 10 | [45132, 32768, 15394, 4462, 2287], # 1 | |
| 11 | [-1, -1, -1, -1, -1], # 2 | |
| 12 | [31992, 26753, 11780, 2265, 795], # 3 | |
| 13 | [24523, 21922, 10282, 1747, 430], # 4 | |
| 14 | [16119, 16903, 9131, 1749, 371], # 5 | |
| 15 | [12290, 12332, 7866, 1750, 399], # 6 | |
| 16 | [9925, 11312, 8258, 2068, 451], # 7 | |
| 17 | [8841, 9805, 8043, 2511, 542], # 8 | |
| 18 | [8891, 9205, 7464, 2438, 622], # 9 | |
| 19 | [7411, 7549, 6040, 2272, 864], # 10 | |
| 20 | [8959, 8456, 4699, 1202, 321], # 11 | |
| 21 | [14569, 9046, 3169, 676, 363], # 12 | |
| 22 | [14147, 7671, 2337, 451, 292]] # 13 | |
| 23 | ||
| 24 | amazoncom = [ | |
| 25 | [1279, 404, 188, 138, 157], # 1 | |
| 26 | [393, 139, 37, 13, 15], # 2 | |
| 27 | [292, 122, 35, 23, 17], # 3 | |
| 28 | [275, 93, 39, 17, 19], # 4 | |
| 29 | [204, 97, 54, 27, 24], # 5 | |
| 30 | [227, 83, 55, 35, 35], # 6 | |
| 31 | [335, 187, 116, 73, 54], # 7 | |
| 32 | [361, 330, 379, 352, 466], # 8 | |
| 33 | [331, 302, 191, 184, 185], # 9 | |
| 34 | [199, 146, 253, 396, 1581], # 10 | |
| 35 | [184, 155, 108, 81, 111], # 11 | |
| 36 | [540, 149, 31, 22, 14], # 12 | |
| 37 | [1628, 346, 150, 62, 358]] # 13 | |
| 38 | ||
| 39 | amazoncouk = [ | |
| 40 | [144, 47, 20, 19, 20], # 1 | |
| 41 | [33, 25, 4, 0, 1], # 2 | |
| 42 | [46, 25, 8, 0, 5], # 3 | |
| 43 | [32, 12, 7, 0, 1], # 4 | |
| 44 | [21, 17, 5, 5, 4], # 5 | |
| 45 | [25, 10, 10, 1, 2], # 6 | |
| 46 | [19, 12, 2, 0, 0], # 7 | |
| 47 | [20, 21, 22, 16, 19], # 8 | |
| 48 | [44, 33, 26, 9, 10], # 9 | |
| 49 | [20, 18, 32, 60, 76], # 10 | |
| 50 | [38, 42, 17, 6, 5], # 11 | |
| 51 | [125, 22, 6, 3, 1], # 12 | |
| 52 | [96, 31, 8, 8, 5]] # 13 | |
| 53 | ||
| 54 | import math | |
| 55 | import numpy as np | |
| 56 | from matplotlib import pyplot as plot | |
| 57 | ||
| 58 | ||
| 59 | def histo_stat(haxis, histo): | |
| 60 | """ | |
| 61 | Parameters: haxis - x axis, histo - weight for each entry in axis | |
| 62 | - | return 0, 0, [] |
| 62 | + | |
| 63 | """ | |
| 64 | sumw = sum(h for h in histo) | |
| 65 | if sumw < 0: | |
| 66 | return 0, 0, [0] * len(haxis) | |
| 67 | ihisto = zip(histo, haxis) | |
| 68 | mean = 1. / sumw * sum(w * x for w, x in ihisto) | |
| 69 | std = math.sqrt(1. / sumw * sum(i[0] * (i[1] - mean) ** 2 for i in ihisto)) | |
| 70 | - | def run(name, data, color): |
| 70 | + | |
| 71 | return mean, std, frac | |
| 72 | ||
| 73 | ||
| 74 | def run(name, data): | |
| 75 | """ | |
| 76 | Paramters: name - datasetname, data - table with histgrams | |
| 77 | """ | |
| 78 | fig = plot.figure(1) | |
| 79 | - | # Special case with no data |
| 79 | + | |
| 80 | - | if name == "Goodreads.com" and i == 1: |
| 80 | + | |
| 81 | stats = [] | |
| 82 | percs = [] | |
| 83 | for i, histo in enumerate(data): | |
| 84 | mean, std, frac = histo_stat(haxis, histo) | |
| 85 | - | stats = np.array(stats) |
| 85 | + | if mean == 0: # Special case with no data |
| 86 | mean = 4.15 | |
| 87 | - | xaxis = range(1, len(data)+1) |
| 87 | + | |
| 88 | - | plot.errorbar(xaxis, stats[:, 0], xerr=0.5, yerr=stats[:, 1], color=color, fmt='-o') |
| 88 | + | |
| 89 | - | plot.fill_between(xaxis, stats[:, 0] - stats[:, 1], stats[:, 0] + stats[:, 1], facecolor=color, alpha=0.3) |
| 89 | + | percs += [[100 * fi for fi in frac]] |
| 90 | print "{:>2}: {:.2f} +- {:.2f} [{}]".format(i + 1, mean, std, perc)
| |
| 91 | - | run("Amazon.com", amazoncom, "red")
|
| 91 | + | return np.array(stats), np.array(percs) |
| 92 | - | run("Amazon.co.uk", amazoncouk, "blue")
|
| 92 | + | |
| 93 | - | run("Goodreads.com", goodreads, "green")
|
| 93 | + | |
| 94 | def plot_stats(stats, color): | |
| 95 | """ | |
| 96 | Plot statistics, can superimpose | |
| 97 | """ | |
| 98 | nx = stats.shape[0] | |
| 99 | ax = plot.subplot(1, 1, 1, xlim=(1, 13)) | |
| 100 | xaxis = range(1, nx + 1) | |
| 101 | plot.errorbar( | |
| 102 | xaxis, stats[:, 0], xerr=0.5, yerr=stats[:, 1], color=color, fmt='-o') | |
| 103 | plot.fill_between(xaxis, stats[:, 0] - stats[:, 1], stats[ | |
| 104 | :, 0] + stats[:, 1], facecolor=color, alpha=0.3) | |
| 105 | plot.ylabel("score")
| |
| 106 | plot.xlabel("book")
| |
| 107 | ||
| 108 | ||
| 109 | def plot_percs(isub, percs, name): | |
| 110 | """ | |
| 111 | Plot percentages as image to a give subfig | |
| 112 | """ | |
| 113 | plot.subplot(3, 1, isub) | |
| 114 | im = plot.imshow( | |
| 115 | percs.T, extent=(1, percs.T.shape[1], 1, percs.T.shape[0]), | |
| 116 | interpolation="nearest", clim=(0, 100)) | |
| 117 | plot.title(name) | |
| 118 | plot.ylabel("score")
| |
| 119 | plot.xlabel("book")
| |
| 120 | return im | |
| 121 | ||
| 122 | stats1, percs1 = run("Amazon.com", amazoncom)
| |
| 123 | stats2, percs2 = run("Amazon.co.uk", amazoncouk)
| |
| 124 | stats3, percs3 = run("Goodreads.com", goodreads)
| |
| 125 | ||
| 126 | fig1 = plot.figure(1) | |
| 127 | plot_stats(stats1, "red") | |
| 128 | plot_stats(stats2, "blue") | |
| 129 | plot_stats(stats3, "green") | |
| 130 | ||
| 131 | fig2 = plot.figure(2) | |
| 132 | plot_percs(1, percs1, "Amazon.com") | |
| 133 | plot_percs(2, percs2, "Amazon.co.uk") | |
| 134 | im = plot_percs(3, percs3, "Goodreads.com") | |
| 135 | cax = fig2.add_axes([0.8, 0.1, 0.03, 0.85]) | |
| 136 | fig2.colorbar(im, cax=cax, label="Percentage") | |
| 137 | fig2.tight_layout() | |
| 138 | plot.show() |