Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- """
- Calcualte stats for Wheel of Time series
- Each row is the number of 5,4,3,2,1 rations for a book in the series
- Collected from goodreads.com, amazon.com and amazon.co.uk
- """
- goodreads = [
- [45132, 32768, 15394, 4462, 2287], # 1
- [-1, -1, -1, -1, -1], # 2
- [31992, 26753, 11780, 2265, 795], # 3
- [24523, 21922, 10282, 1747, 430], # 4
- [16119, 16903, 9131, 1749, 371], # 5
- [12290, 12332, 7866, 1750, 399], # 6
- [9925, 11312, 8258, 2068, 451], # 7
- [8841, 9805, 8043, 2511, 542], # 8
- [8891, 9205, 7464, 2438, 622], # 9
- [7411, 7549, 6040, 2272, 864], # 10
- [8959, 8456, 4699, 1202, 321], # 11
- [14569, 9046, 3169, 676, 363], # 12
- [14147, 7671, 2337, 451, 292]] # 13
- amazoncom = [
- [1279, 404, 188, 138, 157], # 1
- [393, 139, 37, 13, 15], # 2
- [292, 122, 35, 23, 17], # 3
- [275, 93, 39, 17, 19], # 4
- [204, 97, 54, 27, 24], # 5
- [227, 83, 55, 35, 35], # 6
- [335, 187, 116, 73, 54], # 7
- [361, 330, 379, 352, 466], # 8
- [331, 302, 191, 184, 185], # 9
- [199, 146, 253, 396, 1581], # 10
- [184, 155, 108, 81, 111], # 11
- [540, 149, 31, 22, 14], # 12
- [1628, 346, 150, 62, 358]] # 13
- amazoncouk = [
- [144, 47, 20, 19, 20], # 1
- [33, 25, 4, 0, 1], # 2
- [46, 25, 8, 0, 5], # 3
- [32, 12, 7, 0, 1], # 4
- [21, 17, 5, 5, 4], # 5
- [25, 10, 10, 1, 2], # 6
- [19, 12, 2, 0, 0], # 7
- [20, 21, 22, 16, 19], # 8
- [44, 33, 26, 9, 10], # 9
- [20, 18, 32, 60, 76], # 10
- [38, 42, 17, 6, 5], # 11
- [125, 22, 6, 3, 1], # 12
- [96, 31, 8, 8, 5]] # 13
- import math
- import numpy as np
- from matplotlib import pyplot as plot
- def histo_stat(haxis, histo):
- """
- Parameters: haxis - x axis, histo - weight for each entry in axis
- Return: mean, standard devation and bin fractions
- """
- sumw = sum(h for h in histo)
- if sumw < 0:
- return 0, 0, [0] * len(haxis)
- ihisto = zip(histo, haxis)
- mean = 1. / sumw * sum(w * x for w, x in ihisto)
- std = math.sqrt(1. / sumw * sum(i[0] * (i[1] - mean) ** 2 for i in ihisto))
- frac = [float(h) / sumw for h in histo]
- return mean, std, frac
- def run(name, data):
- """
- Paramters: name - datasetname, data - table with histgrams
- """
- fig = plot.figure(1)
- print("----{}----".format(name))
- haxis = range(5, 0, -1)
- stats = []
- percs = []
- for i, histo in enumerate(data):
- mean, std, frac = histo_stat(haxis, histo)
- if mean == 0: # Special case with no data
- mean = 4.15
- stats += [(mean, std)]
- perc = ",".join("{:>6.1%}".format(fi) for fi in frac)
- percs += [[100 * fi for fi in frac]]
- print "{:>2}: {:.2f} +- {:.2f} [{}]".format(i + 1, mean, std, perc)
- return np.array(stats), np.array(percs)
- def plot_stats(stats, color):
- """
- Plot statistics, can superimpose
- """
- nx = stats.shape[0]
- ax = plot.subplot(1, 1, 1, xlim=(1, 13))
- xaxis = range(1, nx + 1)
- plot.errorbar(
- xaxis, stats[:, 0], xerr=0.5, yerr=stats[:, 1], color=color, fmt='-o')
- plot.fill_between(xaxis, stats[:, 0] - stats[:, 1], stats[
- :, 0] + stats[:, 1], facecolor=color, alpha=0.3)
- plot.ylabel("score")
- plot.xlabel("book")
- def plot_percs(isub, percs, name):
- """
- Plot percentages as image to a give subfig
- """
- plot.subplot(3, 1, isub)
- im = plot.imshow(
- percs.T, extent=(1, percs.T.shape[1], 1, percs.T.shape[0]),
- interpolation="nearest", clim=(0, 100))
- plot.title(name)
- plot.ylabel("score")
- plot.xlabel("book")
- return im
- stats1, percs1 = run("Amazon.com", amazoncom)
- stats2, percs2 = run("Amazon.co.uk", amazoncouk)
- stats3, percs3 = run("Goodreads.com", goodreads)
- fig1 = plot.figure(1)
- plot_stats(stats1, "red")
- plot_stats(stats2, "blue")
- plot_stats(stats3, "green")
- fig2 = plot.figure(2)
- plot_percs(1, percs1, "Amazon.com")
- plot_percs(2, percs2, "Amazon.co.uk")
- im = plot_percs(3, percs3, "Goodreads.com")
- cax = fig2.add_axes([0.8, 0.1, 0.03, 0.85])
- fig2.colorbar(im, cax=cax, label="Percentage")
- fig2.tight_layout()
- plot.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement