import math import random from pylab import hist, show #FUNCTIONS def getNormalizerComponent(mu,sigma,dataRange): #make the resolution on this large. n=0. for count in range(dataRange[0]*100,dataRange[1]*100): #density * volume n+=(math.exp((-1.0/2.0)*pow(float(count)*0.01-mu,2)/pow(sigma,2)))*0.01 return(n) #MAIN print "Demonstration of MLE disentangling two mixed, truncated, unnormalized Gaussians" #DECLARE THE TRUE DATA PARAMETERS (ANSWERS) trueSig1=0.9 trueSig2=0.9 trueMu1=4. trueMu2=8. trueRatio=0.2 dataSize=100. range1=[-2,7] range2=[4,12] #DECLARE THE SEARCH SPACE #brute grid sigma1List=[0.3,0.4,0.5,0.6,0.7,0.8,0.9] sigma2List=[0.3,0.4,0.5,0.6,0.7,0.8,0.9] mu1List=[1.,2.,3.,4.,5.,6.,7.,8.,9.,10.] mu2List=[1.,2.,3.,4.,5.,6.,7.,8.,9.,10.] ratioList=[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9] print "Generating Data..." #CREATE DATA ARRAY X x=[] #gauss1 count=0 while count < int(dataSize*trueRatio): guess=random.gauss(trueMu1,trueSig1) if guess > range1[0] and guess < range1[1]: x.append(guess) count+=1 #gauss2 count=0 while count < int(dataSize*(1.0-trueRatio)): guess=random.gauss(trueMu2,trueSig2) if guess > range2[0] and guess < range2[1]: x.append(guess) count+=1 #hist(x) #show() o=open("outfile.csv","w") #LOOP THROUGH PARAMETERS IN A BRUTE FORCE GRID SEARCH maxProb=-99999999999 for mu1 in mu1List: print "Sanity Counter: "+str(mu1) for mu2 in mu2List: for sigma1 in sigma1List: for sigma2 in sigma2List: for ratio in ratioList: workingProb=0. n1=getNormalizerComponent(mu1,sigma1,range1) n2=getNormalizerComponent(mu2,sigma2,range2) for item in x: prob1=0.; prob2=0.; if range1[0] < item < range1[1]: prob1=(ratio)*(1./n1)*math.exp((-1.0/2.0)*pow(item-mu1,2)/pow(sigma1,2)) if range2[0] < item < range2[1]: prob2=(1.-ratio)*(1./n2)*math.exp((-1.0/2.0)*pow(item-mu2,2)/pow(sigma2,2)) workingProb+=math.log((prob1+prob2)) o.write(str(mu1)+","+str(mu2)+","+str(sigma1)+","+str(sigma2)+","+str(ratio)+","+str(workingProb)+"\n") if workingProb > maxProb: maxProb = workingProb maxSigma1 = sigma1 maxSigma2 = sigma2 maxMu1 = mu1 maxMu2 = mu2 maxRatio=ratio o.close() print "Values (true :: max)" print "Gaussian 1: ratio ("+str(trueRatio)+":"+str(maxRatio)+") // sigma ("+str(trueSig1)+":"+str(maxSigma1)+") // mu ("+str(trueMu1)+":"+str(maxMu1)+")." print "Gaussian 2: ratio ("+str(1.0-trueRatio)+":"+str(1.0-maxRatio)+") //sigma ("+str(trueSig2)+":"+str(maxSigma2)+") // mu ("+str(trueMu2)+":"+str(maxMu2)+")."