Advertisement
Guest User

Untitled

a guest
Apr 19th, 2019
90
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.08 KB | None | 0 0
  1. def update_hmm(emission_matrix, unique_words, sentence):
  2. """
  3. Reads in hmm's emission matrix, unique_words, and the entire string of testdata.
  4. Returns nothing. Instead, alters the values of emission matrix by adding 0.00001 and normalizing.
  5. """
  6. EPSILON = 0.00001
  7. seen = False
  8.  
  9. # check if we have seen a word already
  10. for word in sentence:
  11. if word not in unique_words:
  12. seen = True
  13. unique_words.add(word)
  14. for tag in emission_matrix:
  15. emission_matrix[tag][word] = 0
  16.  
  17. # if we have any word that we have not seen already, do this
  18. if seen:
  19. # add 0.0001 to every tag / word in emission_matrix
  20. for tag in emission_matrix:
  21. for word in emission_matrix[tag]:
  22. emission_matrix[tag][word] += EPSILON
  23.  
  24. # normalize
  25. row_sum = 0
  26. for word in emission_matrix[tag]:
  27. row_sum += emission_matrix[tag][word]
  28. for word in emission_matrix[tag]:
  29. emission_matrix[tag][word] /= row_sum
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement