Advertisement
Guest User

Untitled

a guest
Jun 24th, 2019
82
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.43 KB | None | 0 0
  1. import numpy as np
  2. from scipy.misc import logsumexp
  3. # E step
  4. def expectation(B, model):
  5. E = np.zeros(model.n, model.m) #initialize n by m array
  6. A = np.zeros(model.n) #initialize length-n vector. topic freqs
  7. normalizer = np.zeros(model.m) #initialize length-m vector.
  8. for j in range(model.m):
  9. normalizer[j] = sum(model.pi[k] * model.e[k][j] for k in range(model.n))
  10. for i in range(model.n): #for topic i
  11. for j in range(model.m): #for word j
  12. E[i][j] = B[j]*model.pi[i]*model.e[i][j]/normalizer[j]
  13.  
  14. for i in range(model.n):
  15. A[i] = np.sum(E, 1)#sum(E[i, j] for j in range(model.m)) col sum
  16.  
  17. return E, A
  18.  
  19. # M step
  20. def maximize(E, A):
  21. model = Model(len(A), len(E[0]))
  22. model.pi /= model.pi.sum()
  23. # for i in range(model.n):
  24. # model.pi[i] = A[i]/sum(A)
  25. # normalizer = logsumexp(np.log(E))#sum(E[i][k] for k in range(model.m))
  26. # for j in range(model.m):
  27. # model.e[i][j] = E[i][j]/normalizer
  28. return model
  29.  
  30.  
  31. def EM(X, epsilon,n):
  32. categories, B = np.unique(X, return_counts=True)
  33.  
  34. #B={}
  35. for word in X:
  36. B[word] = B.get(word, 0) + 1
  37.  
  38. b = []
  39. word_index = {}
  40. for w, count in B.items():
  41. b.append(count)
  42. word_index[w] = len(b) - 1
  43.  
  44. error = 10000
  45. model = Model(n, len(b))
  46. estimate = get_estimate(X, model, word_index)
  47. while error > epsilon:
  48. results = expectation(b, model)
  49. new_model = maximize(results[0], results[1])
  50. new_estimate = estimate(X, new_model, word_index)
  51. error = abs(new_estimate - estimate)
  52. model = new_model
  53. estimate = new_estimate
  54.  
  55. return model
  56.  
  57.  
  58. def get_estimate(X, model, word_index): # ???
  59. res = 1
  60. for t in range(len(X)):
  61. total = sum(model.pi[i]*model.e[i][word_index[X[t]]] for i in range(model.n))
  62. res *= total
  63. return np.log(res)
  64.  
  65.  
  66.  
  67. class Model:
  68. def __init__(self, n, m):
  69. self.pi = np.random.rand(n)
  70. self.e = np.random.rand(n, m) # entry rc = freq of word c in topic r
  71. self.normalize()
  72. self.n = n #number of topics
  73. self.m = m #number of words
  74.  
  75. def normalize(self):
  76. total = sum(self.pi)
  77. self.pi = self.pi / total
  78.  
  79. for col in range(len(self.e)):
  80. total = sum(self.e[col])
  81. self.e[col] = self.e[col] / total
  82.  
  83.  
  84.  
  85. X, e = util_function()
  86. EM(X, epsilon, n)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement