Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- from scipy.misc import logsumexp
- # E step
- def expectation(B, model):
- E = np.zeros(model.n, model.m) #initialize n by m array
- A = np.zeros(model.n) #initialize length-n vector. topic freqs
- normalizer = np.zeros(model.m) #initialize length-m vector.
- for j in range(model.m):
- normalizer[j] = sum(model.pi[k] * model.e[k][j] for k in range(model.n))
- for i in range(model.n): #for topic i
- for j in range(model.m): #for word j
- E[i][j] = B[j]*model.pi[i]*model.e[i][j]/normalizer[j]
- for i in range(model.n):
- A[i] = np.sum(E, 1)#sum(E[i, j] for j in range(model.m)) col sum
- return E, A
- # M step
- def maximize(E, A):
- model = Model(len(A), len(E[0]))
- model.pi /= model.pi.sum()
- # for i in range(model.n):
- # model.pi[i] = A[i]/sum(A)
- # normalizer = logsumexp(np.log(E))#sum(E[i][k] for k in range(model.m))
- # for j in range(model.m):
- # model.e[i][j] = E[i][j]/normalizer
- return model
- def EM(X, epsilon,n):
- categories, B = np.unique(X, return_counts=True)
- #B={}
- for word in X:
- B[word] = B.get(word, 0) + 1
- b = []
- word_index = {}
- for w, count in B.items():
- b.append(count)
- word_index[w] = len(b) - 1
- error = 10000
- model = Model(n, len(b))
- estimate = get_estimate(X, model, word_index)
- while error > epsilon:
- results = expectation(b, model)
- new_model = maximize(results[0], results[1])
- new_estimate = estimate(X, new_model, word_index)
- error = abs(new_estimate - estimate)
- model = new_model
- estimate = new_estimate
- return model
- def get_estimate(X, model, word_index): # ???
- res = 1
- for t in range(len(X)):
- total = sum(model.pi[i]*model.e[i][word_index[X[t]]] for i in range(model.n))
- res *= total
- return np.log(res)
- class Model:
- def __init__(self, n, m):
- self.pi = np.random.rand(n)
- self.e = np.random.rand(n, m) # entry rc = freq of word c in topic r
- self.normalize()
- self.n = n #number of topics
- self.m = m #number of words
- def normalize(self):
- total = sum(self.pi)
- self.pi = self.pi / total
- for col in range(len(self.e)):
- total = sum(self.e[col])
- self.e[col] = self.e[col] / total
- X, e = util_function()
- EM(X, epsilon, n)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement