Advertisement
Guest User

probtest.py

a guest
Jan 31st, 2013
228
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.44 KB | None | 0 0
  1. #     Test whether there is a different success rate for two strategies of
  2. #     testing a hypothesis.
  3. #     The first method is to run NUM_TRIALS trials and see what the odds ratio
  4. # is between the probability of getting that if the null hypothesis is true
  5. # versus if the alternate hypothesis is true, then see if that exceeds the
  6. # SIGNIFICANCE_THRESHOLD.
  7. #     The second method is to run a series of trials, testing after each one to
  8. # see if the odds ratio exceeds the SIGNIFICANCE_THRESHOLD, stopping if the
  9. # result is significant or if the number of trials reaches NUM_TRIALS.
  10. #     My advance prediction is that the second method will be successful at
  11. # showing significance than the first.
  12. from random import random
  13. factorial = lambda n: n <= 0 and 1 or n * factorial(n - 1)
  14. combinations = lambda n, k: factorial(n) / (factorial(k) * factorial(n - k))
  15.  
  16. def probability_of_data(data, p_hypothesis):
  17.     # Data is a list of booleans: true means the treatment worked.
  18.     # p_hypothesis is the probability a hypothesis gives to the treatment
  19.     # working.
  20.     positive_results = sum(data)
  21.     negative_results = len(data) - positive_results
  22.     # We don't care what order the data are in, only the proportion.
  23.     # I start with a probability greater than one because that reduces the
  24.     # chance of an error due to very small probabilities: the result is the
  25.     # same, the steps are just in the opposite order.
  26.     p_data = combinations(len(data), positive_results)
  27.     p_data *= p_hypothesis**positive_results
  28.     p_data *= (1.0 - p_hypothesis)**negative_results
  29.     return p_data
  30.  
  31. def odds_ratio(data, p_null, p_alternate):
  32.     # Gives the ratio of the probability that the data will be observed given
  33.     # the alternate hypotheis to the probability that the data will be observed
  34.     # given the null hypothesis.
  35.     p_data_given_null = probability_of_data(data, p_null)
  36.     p_data_given_alternate = probability_of_data(data, p_alternate)
  37.     return p_data_given_alternate / p_data_given_null
  38.  
  39. def generate_data(p_treatment_works, NUM_TRIALS):
  40.     # p_treatment_works is the actual probability the treatment works
  41.     data = []
  42.     for i in range(NUM_TRIALS):
  43.         treatment_worked = random() < p_treatment_works
  44.         data.append(treatment_worked)
  45.     return data
  46.  
  47. def test_set_trials_method(data, p_null, p_alternate, NUM_TRIALS, SIGNIFICANCE_THRESHOLD):
  48.     # Return a boolean that says whether the result was significant
  49.     return odds_ratio(data, p_null, p_alternate) > SIGNIFICANCE_THRESHOLD
  50.  
  51. def test_to_max_trials_method(data, p_null, p_alternate, NUM_TRIALS, SIGNIFICANCE_THRESHOLD):
  52.     # Return a boolean that says whether the result was significant
  53.     for i in range(1, len(data)):
  54.         if odds_ratio(data[:i], p_null, p_alternate) > SIGNIFICANCE_THRESHOLD:
  55.             return True
  56.     return False
  57.  
  58. NUM_TRIALS = 25
  59. NUM_TESTS = 100
  60. SIGNIFICANCE_THRESHOLD = 5
  61. set_trials_results = []
  62. to_max_trials_results = []
  63. for i in range(NUM_TESTS):
  64.     data = generate_data(0.33, NUM_TRIALS)
  65.     set_trials_results.append(test_set_trials_method(data, 0.25, 0.5, NUM_TRIALS, SIGNIFICANCE_THRESHOLD))
  66.     to_max_trials_results.append(test_to_max_trials_method(data, 0.25, 0.5, NUM_TRIALS, SIGNIFICANCE_THRESHOLD))
  67.  
  68. print "Of the {0} datasets, significance was found {1} times with the set trials method, and {2} times with the variable trials method.".format(NUM_TESTS, sum(set_trials_results), sum(to_max_trials_results))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement