View difference between Paste ID: AkYX7zhg and RHXJY47M
SHOW: | | - or go back to the newest paste.
1-
# Calcualte stats for Wheel of Time series
1+
#!/usr/bin/env python
2-
# Each row is the number of 5,4,3,2,1 rations for a book in the series
2+
# -*- coding: utf-8 -*-
3-
# Collected from goodreads.com, amazon.com and amazon.co.uk
3+
"""
4
Calcualte stats for Wheel of Time series
5
Each row is the number of 5,4,3,2,1 rations for a book in the series
6
Collected from goodreads.com, amazon.com and amazon.co.uk
7
"""
8
9
goodreads = [
10
    [45132, 32768, 15394, 4462, 2287],  # 1
11
    [-1,    -1,    -1,    -1,   -1],   # 2
12
    [31992, 26753, 11780, 2265,  795],  # 3
13
    [24523, 21922, 10282, 1747,  430],  # 4
14
    [16119, 16903,  9131, 1749,  371],  # 5
15
    [12290, 12332,  7866, 1750,  399],  # 6
16
    [9925, 11312,  8258, 2068,  451],  # 7
17
    [8841,  9805,  8043, 2511,  542],  # 8
18
    [8891,  9205,  7464, 2438,  622],  # 9
19
    [7411,  7549,  6040, 2272,  864],  # 10
20
    [8959,  8456,  4699, 1202,  321],  # 11
21
    [14569,  9046,  3169,  676,  363],  # 12
22
    [14147,  7671,  2337,  451,  292]]  # 13
23
24
amazoncom = [
25
    [1279, 404, 188, 138, 157],  # 1
26
    [393, 139, 37, 13, 15],     # 2
27
    [292, 122, 35, 23, 17],     # 3
28
    [275,  93, 39, 17, 19],     # 4
29
    [204,  97, 54, 27, 24],     # 5
30
    [227, 83, 55, 35, 35],      # 6
31
    [335, 187, 116, 73, 54],    # 7
32
    [361, 330, 379, 352, 466],   # 8
33
    [331, 302, 191, 184, 185],  # 9
34
    [199, 146, 253, 396, 1581],  # 10
35
    [184, 155, 108, 81, 111],   # 11
36
    [540, 149, 31, 22, 14],     # 12
37
    [1628, 346, 150, 62, 358]]  # 13
38
39
amazoncouk = [
40
    [144, 47, 20, 19, 20],  # 1
41
    [33, 25, 4, 0, 1],     # 2
42
    [46, 25, 8, 0, 5],     # 3
43
    [32, 12, 7, 0, 1],     # 4
44
    [21, 17, 5, 5, 4],     # 5
45
    [25, 10, 10, 1, 2],    # 6
46
    [19, 12, 2, 0, 0],     # 7
47
    [20, 21, 22, 16, 19],  # 8
48
    [44, 33, 26, 9, 10],   # 9
49
    [20, 18, 32, 60, 76],  # 10
50
    [38, 42, 17, 6, 5],    # 11
51
    [125, 22, 6, 3, 1],    # 12
52
    [96, 31, 8, 8, 5]]     # 13
53
54
import math
55
import numpy as np
56
from matplotlib import pyplot as plot
57
58
59
def histo_stat(haxis, histo):
60
    """
61
    Parameters: haxis - x axis, histo - weight for each entry in axis
62-
        return 0, 0, []
62+
63
    """
64
    sumw = sum(h for h in histo)
65
    if sumw < 0:
66
        return 0, 0, [0] * len(haxis)
67
    ihisto = zip(histo, haxis)
68
    mean = 1. / sumw * sum(w * x for w, x in ihisto)
69
    std = math.sqrt(1. / sumw * sum(i[0] * (i[1] - mean) ** 2 for i in ihisto))
70-
def run(name, data, color):
70+
71
    return mean, std, frac
72
73
74
def run(name, data):
75
    """
76
    Paramters: name - datasetname, data - table with histgrams
77
    """
78
    fig = plot.figure(1)
79-
        # Special case with no data
79+
80-
        if name == "Goodreads.com" and i == 1:
80+
81
    stats = []
82
    percs = []
83
    for i, histo in enumerate(data):
84
        mean, std, frac = histo_stat(haxis, histo)
85-
    stats = np.array(stats)
85+
        if mean == 0:  # Special case with no data
86
            mean = 4.15
87-
    xaxis = range(1, len(data)+1)
87+
88-
    plot.errorbar(xaxis, stats[:, 0], xerr=0.5, yerr=stats[:, 1], color=color, fmt='-o')
88+
89-
    plot.fill_between(xaxis, stats[:, 0] - stats[:, 1],  stats[:, 0] + stats[:, 1], facecolor=color, alpha=0.3)
89+
        percs += [[100 * fi for fi in frac]]
90
        print "{:>2}: {:.2f} +- {:.2f} [{}]".format(i + 1, mean, std, perc)
91-
run("Amazon.com", amazoncom, "red")
91+
    return np.array(stats), np.array(percs)
92-
run("Amazon.co.uk", amazoncouk, "blue")
92+
93-
run("Goodreads.com", goodreads, "green")
93+
94
def plot_stats(stats, color):
95
    """
96
    Plot statistics, can superimpose
97
    """
98
    nx = stats.shape[0]
99
    ax = plot.subplot(1, 1, 1, xlim=(1, 13))
100
    xaxis = range(1, nx + 1)
101
    plot.errorbar(
102
        xaxis, stats[:, 0], xerr=0.5, yerr=stats[:, 1], color=color, fmt='-o')
103
    plot.fill_between(xaxis, stats[:, 0] - stats[:, 1],  stats[
104
                      :, 0] + stats[:, 1], facecolor=color, alpha=0.3)
105
    plot.ylabel("score")
106
    plot.xlabel("book")
107
108
109
def plot_percs(isub, percs, name):
110
    """
111
    Plot percentages as image to a give subfig
112
    """
113
    plot.subplot(3, 1, isub)
114
    im = plot.imshow(
115
        percs.T, extent=(1, percs.T.shape[1], 1, percs.T.shape[0]),
116
        interpolation="nearest", clim=(0, 100))
117
    plot.title(name)
118
    plot.ylabel("score")
119
    plot.xlabel("book")
120
    return im
121
122
stats1, percs1 = run("Amazon.com", amazoncom)
123
stats2, percs2 = run("Amazon.co.uk", amazoncouk)
124
stats3, percs3 = run("Goodreads.com", goodreads)
125
126
fig1 = plot.figure(1)
127
plot_stats(stats1, "red")
128
plot_stats(stats2, "blue")
129
plot_stats(stats3, "green")
130
131
fig2 = plot.figure(2)
132
plot_percs(1, percs1, "Amazon.com")
133
plot_percs(2, percs2, "Amazon.co.uk")
134
im = plot_percs(3, percs3, "Goodreads.com")
135
cax = fig2.add_axes([0.8, 0.1, 0.03, 0.85])
136
fig2.colorbar(im, cax=cax, label="Percentage")
137
fig2.tight_layout()
138
plot.show()