• API
• FAQ
• Tools
• Archive
SHARE
TWEET # probtest.py a guest Jan 31st, 2013 94 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. #     Test whether there is a different success rate for two strategies of
2. #     testing a hypothesis.
3. #     The first method is to run NUM_TRIALS trials and see what the odds ratio
4. # is between the probability of getting that if the null hypothesis is true
5. # versus if the alternate hypothesis is true, then see if that exceeds the
6. # SIGNIFICANCE_THRESHOLD.
7. #     The second method is to run a series of trials, testing after each one to
8. # see if the odds ratio exceeds the SIGNIFICANCE_THRESHOLD, stopping if the
9. # result is significant or if the number of trials reaches NUM_TRIALS.
10. #     My advance prediction is that the second method will be successful at
11. # showing significance than the first.
12. from random import random
13. factorial = lambda n: n <= 0 and 1 or n * factorial(n - 1)
14. combinations = lambda n, k: factorial(n) / (factorial(k) * factorial(n - k))
15.
16. def probability_of_data(data, p_hypothesis):
17.     # Data is a list of booleans: true means the treatment worked.
18.     # p_hypothesis is the probability a hypothesis gives to the treatment
19.     # working.
20.     positive_results = sum(data)
21.     negative_results = len(data) - positive_results
22.     # We don't care what order the data are in, only the proportion.
23.     # I start with a probability greater than one because that reduces the
24.     # chance of an error due to very small probabilities: the result is the
25.     # same, the steps are just in the opposite order.
26.     p_data = combinations(len(data), positive_results)
27.     p_data *= p_hypothesis**positive_results
28.     p_data *= (1.0 - p_hypothesis)**negative_results
29.     return p_data
30.
31. def odds_ratio(data, p_null, p_alternate):
32.     # Gives the ratio of the probability that the data will be observed given
33.     # the alternate hypotheis to the probability that the data will be observed
34.     # given the null hypothesis.
35.     p_data_given_null = probability_of_data(data, p_null)
36.     p_data_given_alternate = probability_of_data(data, p_alternate)
37.     return p_data_given_alternate / p_data_given_null
38.
39. def generate_data(p_treatment_works, NUM_TRIALS):
40.     # p_treatment_works is the actual probability the treatment works
41.     data = []
42.     for i in range(NUM_TRIALS):
43.         treatment_worked = random() < p_treatment_works
44.         data.append(treatment_worked)
45.     return data
46.
47. def test_set_trials_method(data, p_null, p_alternate, NUM_TRIALS, SIGNIFICANCE_THRESHOLD):
48.     # Return a boolean that says whether the result was significant
49.     return odds_ratio(data, p_null, p_alternate) > SIGNIFICANCE_THRESHOLD
50.
51. def test_to_max_trials_method(data, p_null, p_alternate, NUM_TRIALS, SIGNIFICANCE_THRESHOLD):
52.     # Return a boolean that says whether the result was significant
53.     for i in range(1, len(data)):
54.         if odds_ratio(data[:i], p_null, p_alternate) > SIGNIFICANCE_THRESHOLD:
55.             return True
56.     return False
57.
58. NUM_TRIALS = 25
59. NUM_TESTS = 100
60. SIGNIFICANCE_THRESHOLD = 5
61. set_trials_results = []
62. to_max_trials_results = []
63. for i in range(NUM_TESTS):
64.     data = generate_data(0.33, NUM_TRIALS)
65.     set_trials_results.append(test_set_trials_method(data, 0.25, 0.5, NUM_TRIALS, SIGNIFICANCE_THRESHOLD))
66.     to_max_trials_results.append(test_to_max_trials_method(data, 0.25, 0.5, NUM_TRIALS, SIGNIFICANCE_THRESHOLD))
67.
68. print "Of the {0} datasets, significance was found {1} times with the set trials method, and {2} times with the variable trials method.".format(NUM_TESTS, sum(set_trials_results), sum(to_max_trials_results))
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy.
Top