SHARE
TWEET

Maximum Likelihood fitting of truncated, mixed, two populat

a guest Jan 18th, 2013 75 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import math
  2. import random
  3.  
  4. #FUNCTIONS
  5.  
  6. def getNormalizer(mu,sigma,dataRange):
  7. #make the resolution on this large.
  8.  
  9.  n=0
  10.  for count in range(dataRange[0]*100,dataRange[1]*100):
  11.   #density * volume
  12.   n+=(math.exp((-1.0/2.0)*pow(float(count)*0.01-mu,2)/pow(sigma,2)))*0.01
  13.  
  14.  return(n)
  15.  
  16.  
  17.  
  18. #MAIN
  19.  
  20. print "Demonstration of MLE disentangling two mixed, truncated, unnormalized Gaussians"
  21.  
  22. #DECLARE THE TRUE DATA PARAMETERS (ANSWERS)
  23. trueSig1=0.6
  24. trueSig2=0.8
  25. trueMu1=3.
  26. trueMu2=6.
  27. trueRatio=0.3
  28. dataSize=1000.
  29.  
  30. #This is the problem. setting both limits to [-10,20] (sampling the whole population) works brilliantly, setting it to say [1,6] and [3,10] doesn't work.
  31. range1=[-10,20]
  32. range2=[-10,20]
  33.  
  34.  
  35.  
  36.  
  37. #DECLARE THE SEARCH SPACE
  38.  #brute grid
  39. sigma1List=[0.3,0.4,0.5,0.6,0.7,0.8,0.9]
  40. sigma2List=[0.3,0.4,0.5,0.6,0.7,0.8,0.9]
  41. mu1List=[1.,2.,3.,4.,5.,6.,7.,8.,9.,10.]
  42. mu2List=[1.,2.,3.,4.,5.,6.,7.,8.,9.,10.]
  43. ratioList=[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]
  44.  
  45. print "Generating Data..."
  46. #CREATE DATA ARRAY X
  47. x=[]
  48. #gauss1
  49. count=0
  50. while count < int(dataSize*trueRatio):
  51.  guess=random.gauss(trueMu1,trueSig1)
  52.  if guess > range1[0] and guess < range1[1]:
  53.   x.append(guess)
  54.   count+=1
  55.  
  56. #gauss2
  57. count=0
  58. while count < int(dataSize*(1.0-trueRatio)):
  59.  guess=random.gauss(trueMu2,trueSig2)
  60.  if guess > range2[0] and guess < range2[1]:
  61.   x.append(guess)
  62.   count+=1
  63.  
  64. #LOOP THROUGH PARAMETERS IN A BRUTE FORCE GRID SEARCH
  65. maxProb=-99999999999
  66. for mu1 in mu1List:
  67.  print "Sanity Counter: "+str(mu1)
  68.  for mu2 in mu2List:
  69.   for sigma1 in sigma1List:
  70.    for sigma2 in sigma2List:
  71.     for ratio in ratioList:
  72.      workingProb=0.
  73.  
  74.      n1=getNormalizer(mu1,sigma1,range1)
  75.      n2=getNormalizer(mu2,sigma2,range2)
  76.        
  77.      for item in x:
  78.       prob1=(ratio/n1)*math.exp((-1.0/2.0)*pow(item-mu1,2)/pow(sigma1,2))
  79.       prob2=(((1.0-ratio)/n2))*math.exp((-1.0/2.0)*pow(item-mu2,2)/pow(sigma2,2))
  80.       workingProb+=math.log(prob1+prob2)
  81.                  
  82.      if workingProb > maxProb:
  83.       maxProb = workingProb
  84.       maxSigma1 = sigma1
  85.       maxSigma2 = sigma2
  86.       maxMu1 = mu1
  87.       maxMu2 = mu2
  88.       maxRatio=ratio
  89.  
  90.  
  91. print "Values (true :: max)"
  92. print "Gaussian 1: ratio ("+str(trueRatio)+":"+str(maxRatio)+") // sigma ("+str(trueSig1)+":"+str(maxSigma1)+") // mu ("+str(trueMu1)+":"+str(maxMu1)+")."
  93. print "Gaussian 2: ratio ("+str(1.0-trueRatio)+":"+str(1.0-maxRatio)+") //sigma ("+str(trueSig2)+":"+str(maxSigma2)+") // mu ("+str(trueMu2)+":"+str(maxMu2)+")."
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top