View difference between Paste ID: DReBL56T and
SHOW: | | - or go back to the newest paste.
1-
1+
"""A library for executing running calculations.
2
3
A running calculation is an object that can be fed one value at a time. This
4
allows running several running calculations on a single iterator of values in
5
parallel. This isn't possible with the built-in variants of most calculations,
6
such as max() and heapq.nlargest().
7
8
"""
9
10
from math import sqrt
11
from heapq import heappush, heappushpop
12
from functools import partial
13
14
15
class RunningCalc(object):
16
    pass
17
18
def apply(iterable, *running_calcs):
19
    """Run several running calculations on a single iterable of values."""
20
    feeds = [rcalc.feed for rcalc in running_calcs]
21
    for value in iterable:
22
        for rcalc_feed in running_calcs:
23
            rcalc_feed(value)
24
    return tuple([rcalc.value for rcalc in running_calcs])
25
26
class RunningMax(RunningCalc):
27
    def __init__(self):
28
        self.value = None
29
30
    def feed(self, value):
31
        if self.value is None or value > self.value:
32
            self.value = value
33
34
class RunningMin(RunningCalc):
35
    def __init__(self):
36
        self.value = None
37
38
    def feed(self, value):
39
        if self.value is None or value < self.value:
40
            self.value = value
41
42
class RunningCount(RunningCalc):
43
    def __init__(self, initial_value=0):
44
        self.value = initial_value
45
46
    def feed(self, value):
47
        self.value += 1
48
49
class RunningSum(RunningCalc):
50
    def __init__(self, initial_value=0):
51
        self.value = initial_value
52
53
    def feed(self, value):
54
        self.value += value
55
56
class RunningAverage(RunningCalc):
57
    def __init__(self):
58
        self.value = 0.0
59
        self.n = 0
60
61
    def feed(self, value):
62
        self.n += 1
63
        self.value += (value - self.value) / self.n
64
65
class RunningVariance(RunningCalc):
66
    """calculate a running variance using the Welford algorithm"""
67
    def __init__(self):
68
        self.n = 0
69
        self.mean = 0.0
70
        self.M2 = 0.0
71
72
    def feed(self, value):
73
        self.n += 1
74
        delta = value - mean
75
        self.mean += delta / n
76
        self.M2 += delta * (value - self.mean) # uses the new value of mean!
77
78
    @property
79
    def populationVariance(self):
80
        return (self.M2 / self.n) if self.n > 0 else 0
81
    value = populationVariance
82
83
    @property
84
    def sampleVariance(self):
85
        return (self.M2 / (self.n - 1)) if self.n > 1 else 0
86
87
def RunningStandardDeviation(RunningCalc):
88
    def __init__(self):
89
        self._running_variance = RunningVariance()
90
91
    def feed(self, value):
92
        self._running_variance.feed(value)
93
94
    @property
95
    def populationStandardDeviation(self):
96
        return sqrt(self._running_variance.populationVariance)
97
    value = populationStandardDeviation    
98
99
    @property
100
    def samplepopulationStandardDeviation(self):
101
        return sqrt(self._running_variance.sampleVariance)
102
103
class RunningNLargest(RunningCalc):
104
    def __init__(self, N):
105
        self.heap = []
106
        self.count = 0
107
        self.N = N
108
        
109
    def feed(self, value):
110
        self.count += 1
111
        if self.count <= self.N:
112
            heappush(self.heap, value)
113
        else:
114
            heappushpop(self.heap, value)
115
            
116
    @property
117
    def value(self):
118
        return sorted(self.heap, reversed=True)
119
120
class RunningNSmallest(RunningNLargest):
121
    """Only works on negatable values!"""
122
    # Why isn't there a built-in max-heap? :(
123
    def feed(self, value):
124
        RunningNLargest.feed(self, -value) # note the minus!
125
126
    @property
127
    def value(self):
128
        return sorted([-x for x in self.heap])