Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from __future__ import division
- import random
- import math
- def generateBirthdayCounts(numberBirthdays):
- birthdays = []
- for i in range(numberBirthdays):
- birthdays.append(random.randint(1,365))
- # Find counts for each birthday
- dayCounts = [birthdays.count(day) for day in range(1,366)]
- return dayCounts
- def optimumBirthdayChoice(dayCounts, numberBirthdays):
- # Find how many days need to be chosen to cover half of the birthdays.
- birthdaySum = 0
- numberDays = 0
- k = math.ceil(numberBirthdays*0.5)
- while (birthdaySum < k):
- # Add maximum day count to cumulative sum of birthdays.
- maxCount = max(dayCounts)
- maxCountIndex = dayCounts.index(maxCount)
- dayCounts[maxCountIndex] = 0
- birthdaySum += maxCount
- numberDays += 1
- return numberDays
- def numberDaysSample(numberBirthdays, sampleSize):
- # Sample many times and return a list of required number of days.
- daysSamples = []
- for i in range(sampleSize):
- dayCounts = generateBirthdayCounts(numberBirthdays)
- daysRequired = optimumBirthdayChoice(dayCounts, numberBirthdays)
- daysSamples.append(daysRequired)
- return daysSamples
- def daysRequiredStatistics(numberBirthdays, sampleSize):
- # Get the sample mean and variance of days required.
- daysRequiredSamples = numberDaysSample(numberBirthdays, sampleSize)
- daysSum = sum(daysRequiredSamples)
- meanDays = daysSum/sampleSize
- squaredDays = [x*x for x in daysRequiredSamples]
- sumSq = sum(squaredDays)
- averageSquaredDays = sumSq/sampleSize
- variance = averageSquaredDays - (meanDays**2)
- statistics = [meanDays, variance]
- return statistics
- def makeConfidenceInterval(mean, variance, sampleSize):
- # Construct a 95% confidence interval.
- intervalMin = mean - 1.96*((variance/sampleSize)**0.5)
- intervalMax = mean + 1.96*((variance/sampleSize)**0.5)
- interval = [intervalMin, intervalMax]
- return interval
- numberBirthdays = 2000
- sampleSize = 1000
- birthdayStatistics = daysRequiredStatistics(numberBirthdays, sampleSize)
- mean = birthdayStatistics[0]
- variance = birthdayStatistics[1]
- confInt = makeConfidenceInterval(mean, variance, sampleSize)
- print("mean days is " + str(mean))
- print("variance days is " + str(variance))
- print("95% confidence interval is " + str(confInt))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement