Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python3
- from collections import deque
- from decimal import Decimal
- from statistics import mean as st_mean, fmean as st_fmean, variance as st_var, pvariance as st_pvar, stdev as st_stdev, pstdev as st_pstdev
- from typing import Sequence, NewType, Callable
- """
- This module provides custom statistical functions for calculating variance and standard deviation.
- Does a 1-pass mean function &/ a 1-pass standard deviation fxn exist? It's called Welford's method.
- Includes both population and sample versions using a 2-pass method.
- """
- # Type alias for a function that takes a sequence of floats and an optional float, and returns a float
- StatFunction = NewType('StatFunction', Callable[[Sequence[float], float | None], float])
- class StatisticalFunctions:
- """ Class providing static methods for various statistical calculations. """
- @classmethod
- def float_pvariance(cls, data: Sequence[float], mean: float = None) -> float:
- """ Population variance (similar to statistics.pvariance). Uses floats.
- @https://docs.python.org/3/library/statistics.html#statistics.pvariance
- Return the population variance of data, a non-empty sequence or iterable of real-valued numbers. Variance, or second moment about the mean, is a measure of the variability (spread or dispersion) of data. A large variance indicates that the data is spread out; a small variance indicates it is clustered closely around the mean.
- If the optional second argument mu is given, it should be the population mean of the data. It can also be used to compute the second moment around a point that is not the mean. If it is missing or None (the default), the arithmetic mean is automatically calculated.
- Use `this function to calculate the variance from the entire population. To estimate the variance from a sample, the variance() function is usually a better choice."""
- if mean is not None:
- return sum((x - mean) ** 2 for x in data) / len(data)
- dps: deque[float] = deque(data)
- return cls.float_pvariance(dps, st_fmean(dps))
- @classmethod
- def decimal_pvariance(cls, data: Sequence[Decimal], mean: Decimal = None) -> Decimal:
- """ Population variance (similar to statistics.pvariance). Uses Decimals.
- @https://docs.python.org/3/library/statistics.html#statistics.pvariance
- Return the population variance of data, a non-empty sequence or iterable of real-valued numbers. Variance, or second moment about the mean, is a measure of the variability (spread or dispersion) of data. A large variance indicates that the data is spread out; a small variance indicates it is clustered closely around the mean.
- If the optional second argument mu is given, it should be the population mean of the data. It can also be used to compute the second moment around a point that is not the mean. If it is missing or None (the default), the arithmetic mean is automatically calculated.
- Use `this function to calculate the variance from the entire population. To estimate the variance from a sample, the variance() function is usually a better choice."""
- if mean is not None:
- return sum((x - mean) ** 2 for x in data) / len(data)
- dps: deque[Decimal] = deque(map(Decimal, data))
- return cls.decimal_pvariance(dps, st_mean(dps))
- @classmethod
- def float_pstdev(cls, data: Sequence[float], mean: float = None) -> float:
- """ Population standard deviation (similar to statistics.pstdev). Uses floats.
- @https://docs.python.org/3/library/statistics.html#statistics.pstdev
- Return the population standard deviation (the square root of the population variance). See pvariance() for arguments and other details."""
- if mean is not None:
- return (sum((x - mean) ** 2 for x in data) / len(data)) ** 0.5
- dps: deque[float] = deque(data)
- return cls.float_pstdev(dps, st_fmean(dps))
- @classmethod
- def decimal_pstdev(cls, data: Sequence[Decimal], mean: Decimal = None) -> Decimal:
- """ Population standard deviation (similar to statistics.pstdev). Uses Decimals.
- @https://docs.python.org/3/library/statistics.html#statistics.pstdev
- Return the population standard deviation (the square root of the population variance). See pvariance() for arguments and other details."""
- if mean is not None:
- return (sum((x - mean) ** 2 for x in data) / len(data)) ** 0.5
- dps: deque[Decimal] = deque(map(Decimal, data))
- return cls.decimal_pstdev(dps, st_mean(dps))
- @classmethod
- def float_variance(cls, data: Sequence[float], mean: float = None) -> float:
- """ Sample variance (similar to statistics.variance). Uses floats.
- @https://docs.python.org/3/library/statistics.html#statistics.variance
- Return the sample variance of data, an iterable of at least two real-valued numbers. Variance, or second moment about the mean, is a measure of the variability (spread or dispersion) of data. A large variance indicates that the data is spread out; a small variance indicates it is clustered closely around the mean.
- If the optional second argument xbar is given, it should be the sample mean of data. If it is missing or None (the default), the mean is automatically calculated.
- Use this function when your data is a sample from a population. To calculate the variance from the entire population, see pvariance()."""
- if mean is not None:
- return sum((x - mean) ** 2 for x in data) / (len(data) - 1)
- dps: deque[float] = deque(data)
- return cls.float_variance(dps, st_fmean(dps))
- @classmethod
- def decimal_variance(cls, data: Sequence[Decimal], mean: Decimal = None) -> Decimal:
- """ Sample variance (similar to statistics.variance). Uses Decimals.
- @https://docs.python.org/3/library/statistics.html#statistics.variance
- Return the sample variance of data, an iterable of at least two real-valued numbers. Variance, or second moment about the mean, is a measure of the variability (spread or dispersion) of data. A large variance indicates that the data is spread out; a small variance indicates it is clustered closely around the mean.
- If the optional second argument xbar is given, it should be the sample mean of data. If it is missing or None (the default), the mean is automatically calculated.
- Use this function when your data is a sample from a population. To calculate the variance from the entire population, see pvariance()."""
- if mean is not None:
- return sum((x - mean) ** 2 for x in data) / (len(data) - 1)
- dps: deque[Decimal] = deque(map(Decimal, data))
- return cls.decimal_variance(dps, st_mean(dps))
- @classmethod
- def float_stdev(cls, data: Sequence[float], mean: float = None) -> float:
- """ Sample standard deviation (similar to statistics.stdev). Uses floats.
- @https://docs.python.org/3/library/statistics.html#statistics.stdev
- Return the sample standard deviation (the square root of the sample variance). See variance() for arguments and other details."""
- if mean is not None:
- return (sum((x - mean) ** 2 for x in data) / (len(data) - 1)) ** 0.5
- dps: deque[float] = deque(data)
- return cls.float_stdev(dps, st_fmean(dps))
- @classmethod
- def decimal_stdev(cls, data: Sequence[Decimal], mean: Decimal = None) -> Decimal:
- """ Sample standard deviation (similar to statistics.stdev). Uses Decimals.
- @https://docs.python.org/3/library/statistics.html#statistics.stdev
- Return the sample standard deviation (the square root of the sample variance). See variance() for arguments and other details."""
- if mean is not None:
- return (sum((x - mean) ** 2 for x in data) / (len(data) - 1)) ** 0.5
- dps: deque[Decimal] = deque(map(Decimal, data))
- return cls.decimal_stdev(dps, st_mean(dps))
- @classmethod
- def complex_variance(cls, data: Sequence[complex], mean: complex = None) -> float:
- """ Sample variance (similar to statistics.variance). Uses complex floats.
- @https://docs.python.org/3/library/statistics.html#statistics.variance
- Return the sample variance of data, an iterable of at least two real-valued numbers. Variance, or second moment about the mean, is a measure of the variability (spread or dispersion) of data. A large variance indicates that the data is spread out; a small variance indicates it is clustered closely around the mean.
- If the optional second argument xbar is given, it should be the sample mean of data. If it is missing or None (the default), the mean is automatically calculated.
- Use this function when your data is a sample from a population. To calculate the variance from the entire population, see pvariance()."""
- if mean is not None:
- return sum((diff := x - mean) * diff.conjugate() for x in data) / (len(data) - 1)
- dps: deque[complex] = deque(map(complex, data))
- return cls.complex_variance(dps, st_fmean(dps))
- @classmethod
- def complex_stdev(cls, data: Sequence[complex], mean: complex = None) -> float:
- """ Sample standard deviation (similar to statistics.stdev). Uses complex floats.
- @https://docs.python.org/3/library/statistics.html#statistics.stdev
- Return the sample standard deviation (the square root of the sample variance). See variance() for arguments and other details."""
- if mean is not None:
- return (sum((diff := x - mean) * diff.conjugate() for x in data) / (len(data) - 1)) ** 0.5
- dps: deque[complex] = deque(map(complex, data))
- return cls.complex_stdev(dps, st_fmean(dps))
- def one_pass_variance(data: Sequence[float], exact: bool) -> float:
- if len(data) < 2:
- return 0.0
- K: float = data[0]
- n: int = 0
- Ex = Ex2 = 0.0
- for x in data:
- n += 1
- Ex += x - K
- Ex2 += (x - K) ** 2
- # use n instead of (n-1) if want to compute the exact variance of the given data; or, use (n-1) if data are samples of a larger population.
- variance = (Ex2 - Ex ** 2 / n) / (n - 1 * exact)
- return variance
- def two_pass_variance(data: Sequence[float], exact: bool) -> float:
- n: int = len(data)
- mean: float = sum(data) / n
- # use n instead of (n-1) if want to compute the exact variance of the given data; or, use (n-1) if data are samples of a larger population.
- variance = sum((x - mean) ** 2 for x in data) / (n - 1 * exact)
- return variance
- def two_pass_variance2(data: Sequence[float], exact: bool) -> float:
- mean: float = st_mean(data)
- variance = sum((x - mean) ** 2 for x in data) / (len(data) - 1 * exact)
- return variance
- def two_pass_variance3(data: Sequence[float], exact: bool, mean: float = None) -> float:
- return st_stdev(data, )
- class StdDev:
- K: float
- Ex: float
- Ex2: float
- n: int
- def __init__(self):
- self.K = self.Ex = self.Ex2 = 0.0
- self.n = 0
- def add_variable(self, x: float) -> None:
- if self.n == 0:
- self.K = x
- self.n += 1
- self.Ex += x - self.K
- self.Ex2 += (x - self.K) ** 2
- def remove_variable(self, x: float) -> None:
- self.n -= 1
- self.Ex -= x - self.K
- self.Ex2 -= (x - self.K) ** 2
- @property
- def mean(self) -> float:
- return self.K + self.Ex / self.n
- @property
- def variance(self) -> float:
- return (self.Ex2 - self.Ex ** 2 / self.n) / (self.n - 1)
Advertisement
Add Comment
Please, Sign In to add comment