Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Let X be multivariate hypergeometrically distributed with s
- # categories having assigned values x_1,...x_s having f_1,...f_s
- # instances in the urn. m is the sum of all the f_i.
- # Let Y be the sum of of n observations from X. Y is discrete random
- # variable of s' values y_1,...y_s' with frequency g_1,...g_s'.
- # Y will range from n*x_0 to n*x_s, and s' is given by
- # | Outer Sum from 1..n of the set { x_0,...,x_s } |
- from __future__ import division
- import sys
- # Factorial of N
- def factorial( N ):
- if N == 0:
- return 1.0
- else:
- r = 1.0
- for i in range( 2, N+1 ):
- r *= i
- return r
- # Number of combinations of r items taken from n
- def combinations( n, r ):
- nCr = 1
- if (r*2) > n:
- r = n-r
- for i in range( r-1, -1, -1 ):
- nCr = ( nCr * ( n - i ) ) // ( r - i )
- return nCr
- # Enumerate probability mass pieces from the sum-of-multivariate-hypergeometrics distribution
- # Y, g, s, m, n, f, x are described at the top
- # i is the number of the category from X that we are processing
- # prob[Y] is the probability of observing sum Y
- # free is the number of observations (out of n) that remain to be used
- # maxfree[i] is the maximum allowed value of free at point i
- # k is the number of observations to use from category i
- def enumerate_mhyper_sum( i, prob, free, maxfree, Y, g, s, m, n, f, x ):
- if free == 0:
- if Y not in prob: prob[Y] = 0
- prob[Y] += g
- else:
- for k in range( max( 0, free-maxfree[i+1] ), min( f[i], free ) + 1 ):
- enumerate_mhyper_sum( i+1, prob, free-k, maxfree, Y+(x[i]*k), g*combinations(f[i],k), s, m, n, f, x )
- # Construct the sum-of-multivariate-hypergeometrics distribution
- # Return a discrete distribution such that values[i] has a frequency
- # of mass[i]
- def construct_mhyper_sum( x, f, n ):
- s = len(x)
- m = sum(f)
- prob = dict()
- maxfree = list( f )
- for i in range( s-2, -1, -1 ):
- maxfree[i] += maxfree[i+1]
- maxfree += [ 0 ]
- enumerate_mhyper_sum( 0, prob, n, maxfree, 0, 1, s, m, n, f, x )
- values = []
- mass = []
- for k in sorted( prob.keys() ):
- values.append( k )
- mass.append( prob[k] / combinations(m,n) )
- return ( values, mass )
- n = int(sys.argv[4])
- x = [ -1, 0, 1 ]
- f = [ int(sys.argv[1]),int(sys.argv[2]) , int(sys.argv[3]) ]
- t = int(sys.argv[5])
- #print f
- (values,mass) = construct_mhyper_sum( x, f, n )
- #for (v,d) in zip(values,mass):
- # print v, d
- #print "Probability Mass Sums to: %f" % sum(mass)
- #print "Population Mean (expected sum of %d observations) is: %f" % (n, sum( v*d for (v,d) in zip(values,mass) ) )
- pval = 0
- #print t #debug
- for num in range(len(values)):
- if t == values[num]:
- if t >= 0:
- for calc in range( num, len(values) ):
- # print mass[calc] #debug
- pval = pval + mass[calc]
- else:
- for calc in range( 0, num+1 ):
- pval = pval + mass[calc]
- #print mass[calc] #debug
- print "1-tailed: ", pval
- print "2-tailed: ", pval*2
- #from matplotlib import pylab as p
- #p.plot( vals, density )
- #input()
Add Comment
Please, Sign In to add comment