HasteBin0

Python standalone Welford's stdev [standrd deviation] algorithm V0

Jun 3rd, 2024 (edited)
484
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 11.66 KB | Source Code | 0 0
  1. #!/usr/bin/python3
  2. from collections import deque
  3. from decimal import Decimal
  4. from statistics import mean as st_mean, fmean as st_fmean, variance as st_var, pvariance as st_pvar, stdev as st_stdev, pstdev as st_pstdev
  5. from typing import Sequence, NewType, Callable
  6.  
  7.  
  8. """
  9. This module provides custom statistical functions for calculating variance and standard deviation.
  10. Does a 1-pass mean function &/ a 1-pass standard deviation fxn exist? It's called Welford's method.
  11. Includes both population and sample versions using a 2-pass method.
  12. """
  13.  
  14. # Type alias for a function that takes a sequence of floats and an optional float, and returns a float
  15. StatFunction = NewType('StatFunction', Callable[[Sequence[float], float | None], float])
  16.  
  17. class StatisticalFunctions:
  18.     """ Class providing static methods for various statistical calculations. """
  19.  
  20.     @classmethod
  21.     def float_pvariance(cls, data: Sequence[float], mean: float = None) -> float:
  22.         """ Population variance (similar to statistics.pvariance).  Uses floats.
  23.            @https://docs.python.org/3/library/statistics.html#statistics.pvariance
  24.            Return the population variance of data, a non-empty sequence or iterable of real-valued numbers. Variance, or second moment about the mean, is a measure of the variability (spread or dispersion) of data. A large variance indicates that the data is spread out; a small variance indicates it is clustered closely around the mean.
  25.            If the optional second argument mu is given, it should be the population mean of the data. It can also be used to compute the second moment around a point that is not the mean. If it is missing or None (the default), the arithmetic mean is automatically calculated.
  26.            Use `this function to calculate the variance from the entire population. To estimate the variance from a sample, the variance() function is usually a better choice."""
  27.         if mean is not None:
  28.             return sum((x - mean) ** 2 for x in data) / len(data)
  29.         dps: deque[float] = deque(data)
  30.         return cls.float_pvariance(dps, st_fmean(dps))
  31.  
  32.  
  33.     @classmethod
  34.     def decimal_pvariance(cls, data: Sequence[Decimal], mean: Decimal = None) -> Decimal:
  35.         """ Population variance (similar to statistics.pvariance).  Uses Decimals.
  36.            @https://docs.python.org/3/library/statistics.html#statistics.pvariance
  37.            Return the population variance of data, a non-empty sequence or iterable of real-valued numbers. Variance, or second moment about the mean, is a measure of the variability (spread or dispersion) of data. A large variance indicates that the data is spread out; a small variance indicates it is clustered closely around the mean.
  38.            If the optional second argument mu is given, it should be the population mean of the data. It can also be used to compute the second moment around a point that is not the mean. If it is missing or None (the default), the arithmetic mean is automatically calculated.
  39.            Use `this function to calculate the variance from the entire population. To estimate the variance from a sample, the variance() function is usually a better choice."""
  40.         if mean is not None:
  41.             return sum((x - mean) ** 2 for x in data) / len(data)
  42.         dps: deque[Decimal] = deque(map(Decimal, data))
  43.         return cls.decimal_pvariance(dps, st_mean(dps))
  44.  
  45.     @classmethod
  46.     def float_pstdev(cls, data: Sequence[float], mean: float = None) -> float:
  47.         """ Population standard deviation (similar to statistics.pstdev).  Uses floats.
  48.            @https://docs.python.org/3/library/statistics.html#statistics.pstdev
  49.            Return the population standard deviation (the square root of the population variance). See pvariance() for arguments and other details."""
  50.         if mean is not None:
  51.             return (sum((x - mean) ** 2 for x in data) / len(data)) ** 0.5
  52.         dps: deque[float] = deque(data)
  53.         return cls.float_pstdev(dps, st_fmean(dps))
  54.  
  55.     @classmethod
  56.     def decimal_pstdev(cls, data: Sequence[Decimal], mean: Decimal = None) -> Decimal:
  57.         """ Population standard deviation (similar to statistics.pstdev).  Uses Decimals.
  58.            @https://docs.python.org/3/library/statistics.html#statistics.pstdev
  59.            Return the population standard deviation (the square root of the population variance). See pvariance() for arguments and other details."""
  60.         if mean is not None:
  61.             return (sum((x - mean) ** 2 for x in data) / len(data)) ** 0.5
  62.         dps: deque[Decimal] = deque(map(Decimal, data))
  63.         return cls.decimal_pstdev(dps, st_mean(dps))
  64.  
  65.     @classmethod
  66.     def float_variance(cls, data: Sequence[float], mean: float = None) -> float:
  67.         """ Sample variance (similar to statistics.variance).  Uses floats.
  68.            @https://docs.python.org/3/library/statistics.html#statistics.variance
  69.            Return the sample variance of data, an iterable of at least two real-valued numbers. Variance, or second moment about the mean, is a measure of the variability (spread or dispersion) of data. A large variance indicates that the data is spread out; a small variance indicates it is clustered closely around the mean.
  70.            If the optional second argument xbar is given, it should be the sample mean of data. If it is missing or None (the default), the mean is automatically calculated.
  71.            Use this function when your data is a sample from a population. To calculate the variance from the entire population, see pvariance()."""
  72.         if mean is not None:
  73.             return sum((x - mean) ** 2 for x in data) / (len(data) - 1)
  74.         dps: deque[float] = deque(data)
  75.         return cls.float_variance(dps, st_fmean(dps))
  76.  
  77.     @classmethod
  78.     def decimal_variance(cls, data: Sequence[Decimal], mean: Decimal = None) -> Decimal:
  79.         """ Sample variance (similar to statistics.variance).  Uses Decimals.
  80.            @https://docs.python.org/3/library/statistics.html#statistics.variance
  81.            Return the sample variance of data, an iterable of at least two real-valued numbers. Variance, or second moment about the mean, is a measure of the variability (spread or dispersion) of data. A large variance indicates that the data is spread out; a small variance indicates it is clustered closely around the mean.
  82.            If the optional second argument xbar is given, it should be the sample mean of data. If it is missing or None (the default), the mean is automatically calculated.
  83.            Use this function when your data is a sample from a population. To calculate the variance from the entire population, see pvariance()."""
  84.         if mean is not None:
  85.             return sum((x - mean) ** 2 for x in data) / (len(data) - 1)
  86.         dps: deque[Decimal] = deque(map(Decimal, data))
  87.         return cls.decimal_variance(dps, st_mean(dps))
  88.  
  89.     @classmethod
  90.     def float_stdev(cls, data: Sequence[float], mean: float = None) -> float:
  91.         """ Sample standard deviation (similar to statistics.stdev).  Uses floats.
  92.            @https://docs.python.org/3/library/statistics.html#statistics.stdev
  93.            Return the sample standard deviation (the square root of the sample variance). See variance() for arguments and other details."""
  94.         if mean is not None:
  95.             return (sum((x - mean) ** 2 for x in data) / (len(data) - 1)) ** 0.5
  96.         dps: deque[float] = deque(data)
  97.         return cls.float_stdev(dps, st_fmean(dps))
  98.  
  99.     @classmethod
  100.     def decimal_stdev(cls, data: Sequence[Decimal], mean: Decimal = None) -> Decimal:
  101.         """ Sample standard deviation (similar to statistics.stdev).  Uses Decimals.
  102.            @https://docs.python.org/3/library/statistics.html#statistics.stdev
  103.            Return the sample standard deviation (the square root of the sample variance). See variance() for arguments and other details."""
  104.         if mean is not None:
  105.             return (sum((x - mean) ** 2 for x in data) / (len(data) - 1)) ** 0.5
  106.         dps: deque[Decimal] = deque(map(Decimal, data))
  107.         return cls.decimal_stdev(dps, st_mean(dps))
  108.    
  109.     @classmethod
  110.     def complex_variance(cls, data: Sequence[complex], mean: complex = None) -> float:
  111.         """ Sample variance (similar to statistics.variance).  Uses complex floats.
  112.            @https://docs.python.org/3/library/statistics.html#statistics.variance
  113.            Return the sample variance of data, an iterable of at least two real-valued numbers. Variance, or second moment about the mean, is a measure of the variability (spread or dispersion) of data. A large variance indicates that the data is spread out; a small variance indicates it is clustered closely around the mean.
  114.            If the optional second argument xbar is given, it should be the sample mean of data. If it is missing or None (the default), the mean is automatically calculated.
  115.            Use this function when your data is a sample from a population. To calculate the variance from the entire population, see pvariance()."""
  116.         if mean is not None:
  117.             return sum((diff := x - mean) * diff.conjugate() for x in data) / (len(data) - 1)
  118.         dps: deque[complex] = deque(map(complex, data))
  119.         return cls.complex_variance(dps, st_fmean(dps))
  120.  
  121.     @classmethod
  122.     def complex_stdev(cls, data: Sequence[complex], mean: complex = None) -> float:
  123.         """ Sample standard deviation (similar to statistics.stdev).  Uses complex floats.
  124.            @https://docs.python.org/3/library/statistics.html#statistics.stdev
  125.            Return the sample standard deviation (the square root of the sample variance). See variance() for arguments and other details."""
  126.         if mean is not None:
  127.             return (sum((diff := x - mean) * diff.conjugate() for x in data) / (len(data) - 1)) ** 0.5
  128.         dps: deque[complex] = deque(map(complex, data))
  129.         return cls.complex_stdev(dps, st_fmean(dps))
  130.  
  131.  
  132.  
  133. def one_pass_variance(data: Sequence[float], exact: bool) -> float:
  134.     if len(data) < 2:
  135.         return 0.0
  136.     K: float = data[0]
  137.     n: int = 0
  138.     Ex = Ex2 = 0.0
  139.     for x in data:
  140.         n += 1
  141.         Ex += x - K
  142.         Ex2 += (x - K) ** 2
  143.     # use n instead of (n-1) if want to compute the exact variance of the given data; or, use (n-1) if data are samples of a larger population.
  144.     variance = (Ex2 - Ex ** 2 / n) / (n - 1 * exact)
  145.     return variance
  146.  
  147.  
  148. def two_pass_variance(data: Sequence[float], exact: bool) -> float:
  149.     n: int = len(data)
  150.     mean: float = sum(data) / n
  151.     # use n instead of (n-1) if want to compute the exact variance of the given data; or, use (n-1) if data are samples of a larger population.
  152.     variance = sum((x - mean) ** 2 for x in data) / (n - 1 * exact)
  153.     return variance
  154.  
  155.  
  156. def two_pass_variance2(data: Sequence[float], exact: bool) -> float:
  157.     mean: float = st_mean(data)
  158.     variance = sum((x - mean) ** 2 for x in data) / (len(data) - 1 * exact)
  159.     return variance
  160.  
  161.  
  162. def two_pass_variance3(data: Sequence[float], exact: bool, mean: float = None) -> float:
  163.     return st_stdev(data, )
  164.  
  165.  
  166. class StdDev:
  167.     K: float
  168.     Ex: float
  169.     Ex2: float
  170.     n: int
  171.    
  172.     def __init__(self):
  173.         self.K = self.Ex = self.Ex2 = 0.0
  174.         self.n = 0
  175.  
  176.     def add_variable(self, x: float) -> None:
  177.         if self.n == 0:
  178.             self.K = x
  179.         self.n += 1
  180.         self.Ex += x - self.K
  181.         self.Ex2 += (x - self.K) ** 2
  182.  
  183.     def remove_variable(self, x: float) -> None:
  184.         self.n -= 1
  185.         self.Ex -= x - self.K
  186.         self.Ex2 -= (x - self.K) ** 2
  187.  
  188.     @property
  189.     def mean(self) -> float:
  190.         return self.K + self.Ex / self.n
  191.  
  192.     @property
  193.     def variance(self) -> float:
  194.         return (self.Ex2 - self.Ex ** 2 / self.n) / (self.n - 1)
  195.  
  196.  
Advertisement
Add Comment
Please, Sign In to add comment