View difference between Paste ID: DReBL56T and
SHOW:
|
|
- or go back to the newest paste.
1 | - | |
1 | + | """A library for executing running calculations. |
2 | ||
3 | A running calculation is an object that can be fed one value at a time. This | |
4 | allows running several running calculations on a single iterator of values in | |
5 | parallel. This isn't possible with the built-in variants of most calculations, | |
6 | such as max() and heapq.nlargest(). | |
7 | ||
8 | """ | |
9 | ||
10 | from math import sqrt | |
11 | from heapq import heappush, heappushpop | |
12 | from functools import partial | |
13 | ||
14 | ||
15 | class RunningCalc(object): | |
16 | pass | |
17 | ||
18 | def apply(iterable, *running_calcs): | |
19 | """Run several running calculations on a single iterable of values.""" | |
20 | feeds = [rcalc.feed for rcalc in running_calcs] | |
21 | for value in iterable: | |
22 | for rcalc_feed in running_calcs: | |
23 | rcalc_feed(value) | |
24 | return tuple([rcalc.value for rcalc in running_calcs]) | |
25 | ||
26 | class RunningMax(RunningCalc): | |
27 | def __init__(self): | |
28 | self.value = None | |
29 | ||
30 | def feed(self, value): | |
31 | if self.value is None or value > self.value: | |
32 | self.value = value | |
33 | ||
34 | class RunningMin(RunningCalc): | |
35 | def __init__(self): | |
36 | self.value = None | |
37 | ||
38 | def feed(self, value): | |
39 | if self.value is None or value < self.value: | |
40 | self.value = value | |
41 | ||
42 | class RunningCount(RunningCalc): | |
43 | def __init__(self, initial_value=0): | |
44 | self.value = initial_value | |
45 | ||
46 | def feed(self, value): | |
47 | self.value += 1 | |
48 | ||
49 | class RunningSum(RunningCalc): | |
50 | def __init__(self, initial_value=0): | |
51 | self.value = initial_value | |
52 | ||
53 | def feed(self, value): | |
54 | self.value += value | |
55 | ||
56 | class RunningAverage(RunningCalc): | |
57 | def __init__(self): | |
58 | self.value = 0.0 | |
59 | self.n = 0 | |
60 | ||
61 | def feed(self, value): | |
62 | self.n += 1 | |
63 | self.value += (value - self.value) / self.n | |
64 | ||
65 | class RunningVariance(RunningCalc): | |
66 | """calculate a running variance using the Welford algorithm""" | |
67 | def __init__(self): | |
68 | self.n = 0 | |
69 | self.mean = 0.0 | |
70 | self.M2 = 0.0 | |
71 | ||
72 | def feed(self, value): | |
73 | self.n += 1 | |
74 | delta = value - mean | |
75 | self.mean += delta / n | |
76 | self.M2 += delta * (value - self.mean) # uses the new value of mean! | |
77 | ||
78 | @property | |
79 | def populationVariance(self): | |
80 | return (self.M2 / self.n) if self.n > 0 else 0 | |
81 | value = populationVariance | |
82 | ||
83 | @property | |
84 | def sampleVariance(self): | |
85 | return (self.M2 / (self.n - 1)) if self.n > 1 else 0 | |
86 | ||
87 | def RunningStandardDeviation(RunningCalc): | |
88 | def __init__(self): | |
89 | self._running_variance = RunningVariance() | |
90 | ||
91 | def feed(self, value): | |
92 | self._running_variance.feed(value) | |
93 | ||
94 | @property | |
95 | def populationStandardDeviation(self): | |
96 | return sqrt(self._running_variance.populationVariance) | |
97 | value = populationStandardDeviation | |
98 | ||
99 | @property | |
100 | def samplepopulationStandardDeviation(self): | |
101 | return sqrt(self._running_variance.sampleVariance) | |
102 | ||
103 | class RunningNLargest(RunningCalc): | |
104 | def __init__(self, N): | |
105 | self.heap = [] | |
106 | self.count = 0 | |
107 | self.N = N | |
108 | ||
109 | def feed(self, value): | |
110 | self.count += 1 | |
111 | if self.count <= self.N: | |
112 | heappush(self.heap, value) | |
113 | else: | |
114 | heappushpop(self.heap, value) | |
115 | ||
116 | @property | |
117 | def value(self): | |
118 | return sorted(self.heap, reversed=True) | |
119 | ||
120 | class RunningNSmallest(RunningNLargest): | |
121 | """Only works on negatable values!""" | |
122 | # Why isn't there a built-in max-heap? :( | |
123 | def feed(self, value): | |
124 | RunningNLargest.feed(self, -value) # note the minus! | |
125 | ||
126 | @property | |
127 | def value(self): | |
128 | return sorted([-x for x in self.heap]) |