Advertisement
krusader74

chisq.py

Sep 12th, 2017
557
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.69 KB | None | 0 0
  1. #!/usr/bin/env python
  2. """Chi-square Power, effect size and sample size calculations."""
  3. from scipy.stats import chi2
  4. from scipy.stats import ncx2
  5. from scipy.optimize import fsolve
  6. from math import sqrt
  7.  
  8. def effect_size(H_1, H_0=None):
  9.     """Effect size for Chi-square tests.
  10.  
  11.     Let :math:`(X_1, X_2, \\ldots, X_k)` be a multinomial random variable with
  12.     parameters :math:`p_1, p_2, \\ldots, p_k`. Suppose we wish to test
  13.  
  14.     .. math:: H_0: p_i = p_{i,0}, \\qquad i = 1, 2, \\ldots, k
  15.  
  16.     against :math:`H_1`: not all :math:`p`'s given by :math:`H_0`.
  17.  
  18.     Then the effect size :math:`w` is given by
  19.  
  20.     .. math:: w = \\sqrt{\\sum_{i=1}^k \\frac{(p_{i,1} - p_{i,0})^2}{p_{i,0}}}
  21.  
  22.     :param H_1: Alternative hypothesis. Array of probabilities. I.e., array of floats each in the range from 0.0 to 1.0, which sum to 1.0.
  23.     :param H_0: Optional Null hypothesis. Array of probabilities. I.e., array of floats each in the range from 0.0 to 1.0, which sum to 1.0. Must be same length as H_1. Defaults to equiprobable distribution: [1.0/float(len(H_1))]*len(H_1).
  24.     :returns: Effect size
  25.     """
  26.     k = len(H_1)
  27.     # Default H_0 is equiprobable outcomes:
  28.     if H_0 is None:
  29.         H_0 = [1.0/float(k)]*k
  30.     return sqrt(sum([(H_1[i] - H_0[i])**2 / H_0[i] for i in range(k)]))
  31.  
  32. def power(w, n, df, alpha=0.05):
  33.     """Power of Chi-square tests.
  34.  
  35.     .. math:: 1 - \\beta = 1 - F_{\\mathit{df},\\mathit{nc}}(x_{\\mathit{crit}})
  36.  
  37.     where
  38.  
  39.     - alpha is the significance level :math:`\\alpha`, i.e., the probability of
  40.       a Type I error (rejecting the Null Hypothesis :math:`H_0` when it is true)
  41.     - :math:`\\beta` is the probability of a Type II error (not rejecting
  42.       the Null Hypothesis :math:`H_0` when it is false)
  43.     - :math:`1 - \\beta` is the power, i.e., the probability of rejecting the
  44.       Null Hypothesis :math:`H_0` when it is false.
  45.     - :math:`F` is the cumulative distribution function (cdf) for the noncentral
  46.       chi-square distribution
  47.     - :math:`\\mathit{nc} = n w^2` is the noncentrality parameter
  48.     - :math:`n` is the sample size
  49.     - :math:`w` is the effect size
  50.     - :math:`x_{\\mathit{crit}}` is the :math:`\\chi^2(\\mathit{df})` critical
  51.       value for the given value of alpha, i.e.,
  52.       :math:`P(\\chi^2(\\mathit{df}) \\leq x_{\\mathit{crit}}) = 1 - \\alpha`
  53.  
  54.     :param w: Effect size
  55.     :param n: Sample size
  56.     :param df: Degrees of freedom
  57.     :param alpha: Significance level. Defaults to 0.05.
  58.     :returns: Statistical power.
  59.     """
  60.     nc = float(n) * w * w
  61.         # print("[debug] noncentrality paramter is {0:f}".format(nc))
  62.     x = chi2.ppf(1.0 - alpha, df)
  63.         # print("[debug] chi2.cdf(x={0:f},df={1:d})={2:f}".format(x,df,chi2.cdf(x,df)))
  64.     return 1.0 - ncx2.cdf(x, df, nc)
  65.  
  66. def sample_size(w, df, alpha=0.05, pwr=0.80):
  67.     """Minimum sample size required to obtain power for Chi-square tests.
  68.  
  69.     :param w: Effect size.
  70.     :param df: Degrees of freedom.
  71.     :param alpha: Significance level. Defaults to 0.05.
  72.     :param pwr: Statistical power. Defaults to 0.8.
  73.     :returns: Minimum sample size to achieve given statistical power.
  74.     """
  75.     def fn(n):
  76.         return power(w, n, df, alpha) - pwr
  77.     return fsolve(fn, 100)
  78.  
  79. def Example():
  80.     """**Example 1** excerpted from [Gunt77]_, p. 83.
  81.  
  82.     The example has two parts:
  83.  
  84.     1. Suppose that we wish to test a six-sided die for unbiasedness so that
  85.        all six :math:`p_{i,0}`'s are :math:`\\frac{1}{6}`. If side six is loaded
  86.        so that actually :math:`p_{6,1} = \\frac{1}{4}` and the other five sides
  87.        retain equal probability of occurrrence, find the power for this
  88.        alternative if the significance level is 0.05 and :math:`n=120`.
  89.     2. Then find the minimum *n* needed to make the power at the alternative
  90.        be at least 0.90.
  91.  
  92.  
  93.     *Solution*:
  94.  
  95.     1. The noncentrality parameter is :math:`\\mathit{nc} = \\frac{n}{20} = 6`,
  96.        which implies effect size is :math:`w = \\sqrt{\\frac{1}{20}} = 0.223607`
  97.           and the power is approximately 0.432876.
  98.     2. To achieve power of 0.90 we need :math:`n \\geq 329.389213`.
  99.  
  100.     .. rubric:: References
  101.  
  102.     .. [Gunt77] Guenther, William C. "Power and Sample Size for Approximate
  103.        Chi-Square Tests." *The American Statistician*, Vol. 31, No. 2
  104.        (May, 1977), pp. 83-85. www2.stat.duke.edu/~zo2/dropbox/pdf/2683047.pdf
  105.  
  106.     """
  107.         H_1 = [0.75/5]*5
  108.         H_1.append(0.25)
  109.         # print("[debug] Sum of probs of H_1 is {0:f}".format(sum(H_1)))
  110.         w = effect_size(H_1)
  111.         # print("[debug] effect size of test is {0:f}".format(w))
  112.         n = 120
  113.         df = 5
  114.         alpha = 0.05
  115.     print("1. The power of the test is {0:f}".format(power(w, n, df, alpha)))
  116.     print("2. The sample size to achieve power of 0.90 is {0:f}".format(sample_size(w, df, alpha, 0.90)[0]))
  117.         # print("[debug] The power when n=329.4 is {0:f}".format(power(w,329.4,df,alpha)))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement