chisq.py

#!/usr/bin/env python
"""Chi-square Power, effect size and sample size calculations."""
from scipy.stats import chi2
from scipy.stats import ncx2
from scipy.optimize import fsolve
from math import sqrt

def effect_size(H_1, H_0=None):
    """Effect size for Chi-square tests.

    Let :math:`(X_1, X_2, \\ldots, X_k)` be a multinomial random variable with
    parameters :math:`p_1, p_2, \\ldots, p_k`. Suppose we wish to test

    .. math:: H_0: p_i = p_{i,0}, \\qquad i = 1, 2, \\ldots, k

    against :math:`H_1`: not all :math:`p`'s given by :math:`H_0`.

    Then the effect size :math:`w` is given by

    .. math:: w = \\sqrt{\\sum_{i=1}^k \\frac{(p_{i,1} - p_{i,0})^2}{p_{i,0}}}

    :param H_1: Alternative hypothesis. Array of probabilities. I.e., array of floats each in the range from 0.0 to 1.0, which sum to 1.0.
    :param H_0: Optional Null hypothesis. Array of probabilities. I.e., array of floats each in the range from 0.0 to 1.0, which sum to 1.0. Must be same length as H_1. Defaults to equiprobable distribution: [1.0/float(len(H_1))]*len(H_1).
    :returns: Effect size
    """
    k = len(H_1)
    # Default H_0 is equiprobable outcomes:
    if H_0 is None:
        H_0 = [1.0/float(k)]*k
    return sqrt(sum([(H_1[i] - H_0[i])**2 / H_0[i] for i in range(k)]))

def power(w, n, df, alpha=0.05):
    """Power of Chi-square tests.

    .. math:: 1 - \\beta = 1 - F_{\\mathit{df},\\mathit{nc}}(x_{\\mathit{crit}})

    where

    - alpha is the significance level :math:`\\alpha`, i.e., the probability of
      a Type I error (rejecting the Null Hypothesis :math:`H_0` when it is true)
    - :math:`\\beta` is the probability of a Type II error (not rejecting
      the Null Hypothesis :math:`H_0` when it is false)
    - :math:`1 - \\beta` is the power, i.e., the probability of rejecting the
      Null Hypothesis :math:`H_0` when it is false.
    - :math:`F` is the cumulative distribution function (cdf) for the noncentral
      chi-square distribution
    - :math:`\\mathit{nc} = n w^2` is the noncentrality parameter
    - :math:`n` is the sample size
    - :math:`w` is the effect size
    - :math:`x_{\\mathit{crit}}` is the :math:`\\chi^2(\\mathit{df})` critical
      value for the given value of alpha, i.e.,
      :math:`P(\\chi^2(\\mathit{df}) \\leq x_{\\mathit{crit}}) = 1 - \\alpha`

    :param w: Effect size
    :param n: Sample size
    :param df: Degrees of freedom
    :param alpha: Significance level. Defaults to 0.05.
    :returns: Statistical power.
    """
    nc = float(n) * w * w
        # print("[debug] noncentrality paramter is {0:f}".format(nc))
    x = chi2.ppf(1.0 - alpha, df)
        # print("[debug] chi2.cdf(x={0:f},df={1:d})={2:f}".format(x,df,chi2.cdf(x,df)))
    return 1.0 - ncx2.cdf(x, df, nc)

def sample_size(w, df, alpha=0.05, pwr=0.80):
    """Minimum sample size required to obtain power for Chi-square tests.

    :param w: Effect size.
    :param df: Degrees of freedom.
    :param alpha: Significance level. Defaults to 0.05.
    :param pwr: Statistical power. Defaults to 0.8.
    :returns: Minimum sample size to achieve given statistical power.
    """
    def fn(n):
        return power(w, n, df, alpha) - pwr
    return fsolve(fn, 100)

def Example():
    """**Example 1** excerpted from [Gunt77]_, p. 83.

    The example has two parts:

    1. Suppose that we wish to test a six-sided die for unbiasedness so that
       all six :math:`p_{i,0}`'s are :math:`\\frac{1}{6}`. If side six is loaded
       so that actually :math:`p_{6,1} = \\frac{1}{4}` and the other five sides
       retain equal probability of occurrrence, find the power for this
       alternative if the significance level is 0.05 and :math:`n=120`.
    2. Then find the minimum *n* needed to make the power at the alternative
       be at least 0.90.


    *Solution*:

    1. The noncentrality parameter is :math:`\\mathit{nc} = \\frac{n}{20} = 6`,
       which implies effect size is :math:`w = \\sqrt{\\frac{1}{20}} = 0.223607`
           and the power is approximately 0.432876.
    2. To achieve power of 0.90 we need :math:`n \\geq 329.389213`.

    .. rubric:: References

    .. [Gunt77] Guenther, William C. "Power and Sample Size for Approximate
       Chi-Square Tests." *The American Statistician*, Vol. 31, No. 2
       (May, 1977), pp. 83-85. www2.stat.duke.edu/~zo2/dropbox/pdf/2683047.pdf

    """
        H_1 = [0.75/5]*5
        H_1.append(0.25)
        # print("[debug] Sum of probs of H_1 is {0:f}".format(sum(H_1)))
        w = effect_size(H_1)
        # print("[debug] effect size of test is {0:f}".format(w))
        n = 120
        df = 5
        alpha = 0.05
    print("1. The power of the test is {0:f}".format(power(w, n, df, alpha)))
    print("2. The sample size to achieve power of 0.90 is {0:f}".format(sample_size(w, df, alpha, 0.90)[0]))
        # print("[debug] The power when n=329.4 is {0:f}".format(power(w,329.4,df,alpha)))