Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import random
- import matplotlib.pyplot as plt
- import scipy.stats
- def create_pop(n):
- # to randomly create population distribution set with given population size.
- # replication of uniform distribution
- distrib = 1000 * np.random.random_sample((n, ))
- return distrib
- def graph_distribution(dist, name):
- plt.hist(dist)
- plt.title(name)
- plt.show()
- def sample_procedure(dist):
- sample_size = int(raw_input("Enter the sample size:"))
- sample_repeats = int(raw_input("Enter the number of times to repeat the experiment:"))
- sample_means = []
- # some essential data
- standard_deviation_population = np.std(population)
- mean_population = np.mean(population)
- standard_error = standard_deviation_population / (sample_size)**0.5
- #confidence interval = ยต +/- Z.95*SE
- confidence_interval = scipy.stats.norm.interval(.95, mean_population, standard_error)
- lower = confidence_interval[0]
- upper = confidence_interval[1]
- count = 0
- for _ in range(sample_repeats):
- # create sample distribution set with given sample size, and calculate the sample mean
- # repeat the process for given times by while loop
- sample = np.random.choice(dist, (sample_size, ))
- this_mean = np.mean(sample)
- sample_means.append(this_mean)
- #keep track of the number of times the population mean falls within the confidence intervals
- if lower <mean_population < upper:
- count += 1
- print "Here's what the sample mean distribution looks like."
- plt.hist(sample_means)
- plt.title('Sample Means')
- plt.show()
- print "The mean of the sample means is:", np.mean(sample_means)
- print "The Standard Error / The standard deviation of the sample means is:", np.std(sample_means)
- print ""
- #####################################
- print "First create a (pseudo)-random distribution."
- population_size = int(raw_input("Enter a population size:"))
- print "..."
- population = create_pop(population_size)
- print "Here's what the population distribution looks like."
- graph_distribution(population, 'Population Distribution')
- print "The population mean is:", np.mean(population)
- print ""
- print "The population standard deviation is:", np.std(population)
- print ""
- print "For the second step, enter the size of the samples to draw --with replacement-- from the population distribution, and how many times to repeat this procedure in order to create a distribution of sample means."
- sample_flag = True
- while sample_flag:
- sample_procedure(population)
- #track the number of times the population mean falls within the confidence intervals
- # and get the proportion from the sample_repeats base
- #I expect to get the probability close to 0.95(confidence interval)
- probability = float(count) /sample_repeat
- print probablilty
- print "Perform sampling procedure again? Note that if no, then the population distribution will be lost."
- decision = raw_input("Type y or n:")
- if decision == 'n':
- sample_flag = False
- else:
- print "Doing procedure again."
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement