HasteBin0

Python standalone Welford's stdev [standrd deviation] algorithm V1

Jun 5th, 2024 (edited)
468
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.37 KB | Source Code | 0 0
  1. #!/usr/bin/python3
  2. from collections import deque
  3. from decimal import Decimal
  4. from statistics import mean as st_mean, fmean as st_fmean, variance as st_var, stdev as st_stdev, pstdev as st_pstdev
  5. from typing import Sequence, Callable
  6.  
  7.  
  8. """
  9. This module provides custom statistical functions for calculating variance and standard deviation.
  10. Does a 1-pass mean function &/ a 1-pass standard deviation fxn exist? It's called Welford's method.
  11. Includes both population and sample versions using a 2-pass method.
  12. """
  13.  
  14. # Type alias for a function that takes a sequence of floats and an optional float, and returns a float
  15. StatFunction = Callable[[Sequence[float], float | None], float]
  16.  
  17. class StatisticalFunctions:
  18.     """Class providing static methods for various statistical calculations."""
  19.  
  20.     @classmethod
  21.     def float_pvariance(cls, data: Sequence[float], mean: float = None) -> float:
  22.         """ Population variance (similar to statistics.pvariance). Uses floats.
  23.            https://docs.python.org/3/library/statistics.html#statistics.pvariance
  24.        """
  25.         if mean is None:
  26.             mean = st_fmean(data)
  27.         return sum((x - mean) ** 2 for x in data) / len(data)
  28.  
  29.     @classmethod
  30.     def decimal_pvariance(cls, data: Sequence[Decimal], mean: Decimal = None) -> Decimal:
  31.         """ Population variance (similar to statistics.pvariance). Uses Decimals.
  32.            https://docs.python.org/3/library/statistics.html#statistics.pvariance
  33.        """
  34.         if mean is None:
  35.             mean = st_mean(data)
  36.         return sum((x - mean) ** 2 for x in data) / len(data)
  37.  
  38.     @classmethod
  39.     def float_pstdev(cls, data: Sequence[float], mean: float = None) -> float:
  40.         """ Population standard deviation (similar to statistics.pstdev). Uses floats.
  41.            https://docs.python.org/3/library/statistics.html#statistics.pstdev
  42.        """
  43.         variance = cls.float_pvariance(data, mean)
  44.         return variance ** 0.5
  45.  
  46.     @classmethod
  47.     def decimal_pstdev(cls, data: Sequence[Decimal], mean: Decimal = None) -> Decimal:
  48.         """ Population standard deviation (similar to statistics.pstdev). Uses Decimals.
  49.            https://docs.python.org/3/library/statistics.html#statistics.pstdev
  50.        """
  51.         variance = cls.decimal_pvariance(data, mean)
  52.         return variance ** 0.5
  53.  
  54.     @classmethod
  55.     def float_variance(cls, data: Sequence[float], mean: float = None) -> float:
  56.         """ Sample variance (similar to statistics.variance). Uses floats.
  57.            https://docs.python.org/3/library/statistics.html#statistics.variance
  58.        """
  59.         if mean is None:
  60.             mean = st_fmean(data)
  61.         return sum((x - mean) ** 2 for x in data) / (len(data) - 1)
  62.  
  63.     @classmethod
  64.     def decimal_variance(cls, data: Sequence[Decimal], mean: Decimal = None) -> Decimal:
  65.         """ Sample variance (similar to statistics.variance). Uses Decimals.
  66.            https://docs.python.org/3/library/statistics.html#statistics.variance
  67.        """
  68.         if mean is None:
  69.             mean = st_mean(data)
  70.         return sum((x - mean) ** 2 for x in data) / (len(data) - 1)
  71.  
  72.     @classmethod
  73.     def float_stdev(cls, data: Sequence[float], mean: float = None) -> float:
  74.         """ Sample standard deviation (similar to statistics.stdev). Uses floats.
  75.            https://docs.python.org/3/library/statistics.html#statistics.stdev
  76.        """
  77.         variance = cls.float_variance(data, mean)
  78.         return variance ** 0.5
  79.  
  80.     @classmethod
  81.     def decimal_stdev(cls, data: Sequence[Decimal], mean: Decimal = None) -> Decimal:
  82.         """ Sample standard deviation (similar to statistics.stdev). Uses Decimals.
  83.            https://docs.python.org/3/library/statistics.html#statistics.stdev
  84.        """
  85.         variance = cls.decimal_variance(data, mean)
  86.         return variance ** 0.5
  87.  
  88.     @classmethod
  89.     def complex_variance(cls, data: Sequence[complex], mean: complex = None) -> complex:
  90.         """ Sample variance (similar to statistics.variance). Uses complex numbers.
  91.            https://docs.python.org/3/library/statistics.html#statistics.variance
  92.        """
  93.         if mean is None:
  94.             mean = sum(data) / len(data)
  95.         return sum((x - mean) * (x - mean).conjugate() for x in data) / (len(data) - 1)
  96.  
  97.     @classmethod
  98.     def complex_stdev(cls, data: Sequence[complex], mean: complex = None) -> complex:
  99.         """ Sample standard deviation (similar to statistics.stdev). Uses complex numbers.
  100.            https://docs.python.org/3/library/statistics.html#statistics.stdev
  101.        """
  102.         variance = cls.complex_variance(data, mean)
  103.         return variance ** 0.5
  104.  
  105.  
  106. def one_pass_variance(data: Sequence[float], exact: bool) -> float:
  107.     if len(data) < 2:
  108.         return 0.0
  109.     K: float = data[0]
  110.     n: int = 0
  111.     Ex = Ex2 = 0.0
  112.     for x in data:
  113.         n += 1
  114.         Ex += x - K
  115.         Ex2 += (x - K) ** 2
  116.     variance = (Ex2 - Ex ** 2 / n) / (n - 1 if exact else n)
  117.     return variance
  118.  
  119.  
  120. def two_pass_variance(data: Sequence[float], exact: bool) -> float:
  121.     n: int = len(data)
  122.     mean: float = sum(data) / n
  123.     variance = sum((x - mean) ** 2 for x in data) / (n - 1 if exact else n)
  124.     return variance
  125.  
  126.  
  127. def two_pass_variance2(data: Sequence[float], exact: bool) -> float:
  128.     mean: float = st_mean(data)
  129.     variance = sum((x - mean) ** 2 for x in data) / (len(data) - 1 if exact else len(data))
  130.     return variance
  131.  
  132.  
  133. def two_pass_variance3(data: Sequence[float], exact: bool, mean: float = None) -> float:
  134.     if mean is None:
  135.         mean = st_mean(data)
  136.     variance = sum((x - mean) ** 2 for x in data) / (len(data) - 1 if exact else len(data))
  137.     return variance
  138.  
  139.  
  140. class StdDev:
  141.     K: float
  142.     Ex: float
  143.     Ex2: float
  144.     n: int
  145.  
  146.     def __init__(self):
  147.         self.K = self.Ex = self.Ex2 = 0.0
  148.         self.n = 0
  149.  
  150.     def add_variable(self, x: float) -> None:
  151.         if self.n == 0:
  152.             self.K = x
  153.         self.n += 1
  154.         self.Ex += x - self.K
  155.         self.Ex2 += (x - self.K) ** 2
  156.  
  157.     def remove_variable(self, x: float) -> None:
  158.         self.n -= 1
  159.         self.Ex -= x - self.K
  160.         self.Ex2 -= (x - self.K) ** 2
  161.  
  162.     @property
  163.     def mean(self) -> float:
  164.         return self.K + self.Ex / self.n
  165.  
  166.     @property
  167.     def variance(self) -> float:
  168.         return (self.Ex2 - self.Ex ** 2 / self.n) / (self.n - 1 if self.n > 1 else self.n)
  169.  
  170.  
Advertisement
Add Comment
Please, Sign In to add comment