Maximum Likelihood fitting of truncated, mixed, two populat

a guest
Jan 18th, 2013
90
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. import math
2. import random
3.
4. #FUNCTIONS
5.
6. def getNormalizer(mu,sigma,dataRange):
7. #make the resolution on this large.
8.
9.  n=0
10.  for count in range(dataRange[0]*100,dataRange[1]*100):
11.   #density * volume
12.   n+=(math.exp((-1.0/2.0)*pow(float(count)*0.01-mu,2)/pow(sigma,2)))*0.01
13.
14.  return(n)
15.
16.
17.
18. #MAIN
19.
20. print "Demonstration of MLE disentangling two mixed, truncated, unnormalized Gaussians"
21.
22. #DECLARE THE TRUE DATA PARAMETERS (ANSWERS)
23. trueSig1=0.6
24. trueSig2=0.8
25. trueMu1=3.
26. trueMu2=6.
27. trueRatio=0.3
28. dataSize=1000.
29.
30. #This is the problem. setting both limits to [-10,20] (sampling the whole population) works brilliantly, setting it to say [1,6] and [3,10] doesn't work.
31. range1=[-10,20]
32. range2=[-10,20]
33.
34.
35.
36.
37. #DECLARE THE SEARCH SPACE
38.  #brute grid
39. sigma1List=[0.3,0.4,0.5,0.6,0.7,0.8,0.9]
40. sigma2List=[0.3,0.4,0.5,0.6,0.7,0.8,0.9]
41. mu1List=[1.,2.,3.,4.,5.,6.,7.,8.,9.,10.]
42. mu2List=[1.,2.,3.,4.,5.,6.,7.,8.,9.,10.]
43. ratioList=[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]
44.
45. print "Generating Data..."
46. #CREATE DATA ARRAY X
47. x=[]
48. #gauss1
49. count=0
50. while count < int(dataSize*trueRatio):
51.  guess=random.gauss(trueMu1,trueSig1)
52.  if guess > range1[0] and guess < range1[1]:
53.   x.append(guess)
54.   count+=1
55.
56. #gauss2
57. count=0
58. while count < int(dataSize*(1.0-trueRatio)):
59.  guess=random.gauss(trueMu2,trueSig2)
60.  if guess > range2[0] and guess < range2[1]:
61.   x.append(guess)
62.   count+=1
63.
64. #LOOP THROUGH PARAMETERS IN A BRUTE FORCE GRID SEARCH
65. maxProb=-99999999999
66. for mu1 in mu1List:
67.  print "Sanity Counter: "+str(mu1)
68.  for mu2 in mu2List:
69.   for sigma1 in sigma1List:
70.    for sigma2 in sigma2List:
71.     for ratio in ratioList:
72.      workingProb=0.
73.
74.      n1=getNormalizer(mu1,sigma1,range1)
75.      n2=getNormalizer(mu2,sigma2,range2)
76.
77.      for item in x:
78.       prob1=(ratio/n1)*math.exp((-1.0/2.0)*pow(item-mu1,2)/pow(sigma1,2))
79.       prob2=(((1.0-ratio)/n2))*math.exp((-1.0/2.0)*pow(item-mu2,2)/pow(sigma2,2))
80.       workingProb+=math.log(prob1+prob2)
81.
82.      if workingProb > maxProb:
83.       maxProb = workingProb
84.       maxSigma1 = sigma1
85.       maxSigma2 = sigma2
86.       maxMu1 = mu1
87.       maxMu2 = mu2
88.       maxRatio=ratio
89.
90.
91. print "Values (true :: max)"
92. print "Gaussian 1: ratio ("+str(trueRatio)+":"+str(maxRatio)+") // sigma ("+str(trueSig1)+":"+str(maxSigma1)+") // mu ("+str(trueMu1)+":"+str(maxMu1)+")."
93. print "Gaussian 2: ratio ("+str(1.0-trueRatio)+":"+str(1.0-maxRatio)+") //sigma ("+str(trueSig2)+":"+str(maxSigma2)+") // mu ("+str(trueMu2)+":"+str(maxMu2)+")."
RAW Paste Data