probtest.py

#     Test whether there is a different success rate for two strategies of
#     testing a hypothesis.
#     The first method is to run NUM_TRIALS trials and see what the odds ratio
# is between the probability of getting that if the null hypothesis is true
# versus if the alternate hypothesis is true, then see if that exceeds the
# SIGNIFICANCE_THRESHOLD.
#     The second method is to run a series of trials, testing after each one to
# see if the odds ratio exceeds the SIGNIFICANCE_THRESHOLD, stopping if the
# result is significant or if the number of trials reaches NUM_TRIALS.
#     My advance prediction is that the second method will be successful at
# showing significance than the first.
from random import random
factorial = lambda n: n <= 0 and 1 or n * factorial(n - 1)
combinations = lambda n, k: factorial(n) / (factorial(k) * factorial(n - k))

def probability_of_data(data, p_hypothesis):
    # Data is a list of booleans: true means the treatment worked.
    # p_hypothesis is the probability a hypothesis gives to the treatment
    # working.
    positive_results = sum(data)
    negative_results = len(data) - positive_results
    # We don't care what order the data are in, only the proportion.
    # I start with a probability greater than one because that reduces the
    # chance of an error due to very small probabilities: the result is the
    # same, the steps are just in the opposite order.
    p_data = combinations(len(data), positive_results)
    p_data *= p_hypothesis**positive_results
    p_data *= (1.0 - p_hypothesis)**negative_results
    return p_data

def odds_ratio(data, p_null, p_alternate):
    # Gives the ratio of the probability that the data will be observed given
    # the alternate hypotheis to the probability that the data will be observed
    # given the null hypothesis.
    p_data_given_null = probability_of_data(data, p_null)
    p_data_given_alternate = probability_of_data(data, p_alternate)
    return p_data_given_alternate / p_data_given_null

def generate_data(p_treatment_works, NUM_TRIALS):
    # p_treatment_works is the actual probability the treatment works
    data = []
    for i in range(NUM_TRIALS):
        treatment_worked = random() < p_treatment_works
        data.append(treatment_worked)
    return data

def test_set_trials_method(data, p_null, p_alternate, NUM_TRIALS, SIGNIFICANCE_THRESHOLD):
    # Return a boolean that says whether the result was significant
    return odds_ratio(data, p_null, p_alternate) > SIGNIFICANCE_THRESHOLD

def test_to_max_trials_method(data, p_null, p_alternate, NUM_TRIALS, SIGNIFICANCE_THRESHOLD):
    # Return a boolean that says whether the result was significant
    for i in range(1, len(data)):
        if odds_ratio(data[:i], p_null, p_alternate) > SIGNIFICANCE_THRESHOLD:
            return True
    return False

NUM_TRIALS = 25
NUM_TESTS = 100
SIGNIFICANCE_THRESHOLD = 5
set_trials_results = []
to_max_trials_results = []
for i in range(NUM_TESTS):
    data = generate_data(0.33, NUM_TRIALS)
    set_trials_results.append(test_set_trials_method(data, 0.25, 0.5, NUM_TRIALS, SIGNIFICANCE_THRESHOLD))
    to_max_trials_results.append(test_to_max_trials_method(data, 0.25, 0.5, NUM_TRIALS, SIGNIFICANCE_THRESHOLD))

print "Of the {0} datasets, significance was found {1} times with the set trials method, and {2} times with the variable trials method.".format(NUM_TESTS, sum(set_trials_results), sum(to_max_trials_results))