Advertisement
Guest User

sampling + Regression to mean + Confidence Intercals (b)

a guest
Feb 7th, 2016
61
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.15 KB | None | 0 0
  1. import numpy as np
  2. import random
  3. import matplotlib.pyplot as plt
  4. import scipy.stats
  5.  
  6. def create_pop(n):
  7.     # to randomly create population distribution set with given population size.
  8.     # replication of uniform distribution
  9.     distrib = 1000 * np.random.random_sample((n, ))
  10.     return distrib
  11.  
  12. def graph_distribution(dist, name):
  13.     plt.hist(dist)
  14.     plt.title(name)
  15.     plt.show()
  16.  
  17. def sample_procedure(dist):
  18.     sample_size = int(raw_input("Enter the sample size:"))
  19.     sample_repeats = int(raw_input("Enter the number of times to repeat the experiment:"))
  20.     sample_means = []
  21.     # some essential data
  22.     standard_deviation_population = np.std(population)
  23.     mean_population = np.mean(population)
  24.     standard_error = standard_deviation_population / (sample_size)**0.5
  25.     #confidence interval = ยต +/- Z.95*SE
  26.     confidence_interval = scipy.stats.norm.interval(.95, mean_population, standard_error)
  27.     lower = confidence_interval[0]
  28.     upper = confidence_interval[1]
  29.     count = 0
  30.     for _ in range(sample_repeats):        
  31.         # create sample distribution set with given sample size, and calculate the sample mean
  32.         # repeat the process for given times by while loop
  33.         sample = np.random.choice(dist, (sample_size, ))
  34.         this_mean = np.mean(sample)
  35.         sample_means.append(this_mean)
  36.         #keep track of the number of times the population mean falls within the confidence intervals
  37.         if lower <mean_population < upper:
  38.             count += 1
  39.     print "Here's what the sample mean distribution looks like."
  40.    
  41.     plt.hist(sample_means)
  42.     plt.title('Sample Means')
  43.     plt.show()
  44.    
  45.     print "The mean of the sample means is:", np.mean(sample_means)
  46.     print "The Standard Error / The standard deviation of the sample means is:", np.std(sample_means)
  47.     print ""
  48.    
  49. #####################################
  50.  
  51. print "First create a (pseudo)-random distribution."
  52. population_size = int(raw_input("Enter a population size:"))
  53. print "..."
  54. population = create_pop(population_size)
  55. print "Here's what the population distribution looks like."
  56. graph_distribution(population, 'Population Distribution')
  57.  
  58. print "The population mean is:", np.mean(population)
  59. print ""
  60. print "The population standard deviation is:", np.std(population)
  61. print ""
  62. print "For the second step, enter the size of the samples to draw --with replacement-- from the population distribution, and how many times to repeat this procedure in order to create a distribution of sample means."
  63.  
  64. sample_flag = True
  65.  
  66. while sample_flag:
  67.     sample_procedure(population)
  68.     #track the number of times the population mean falls within the confidence intervals
  69.     # and get the proportion from the sample_repeats base
  70.     #I expect to get the probability close to 0.95(confidence interval)
  71.     probability = float(count) /sample_repeat
  72.     print probablilty
  73.     print "Perform sampling procedure again? Note that if no, then the population distribution will be lost."
  74.     decision = raw_input("Type y or n:")
  75.     if decision == 'n':
  76.         sample_flag = False
  77.     else:
  78.         print "Doing procedure again."
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement