Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def update_hmm(emission_matrix, unique_words, sentence):
- """
- Reads in hmm's emission matrix, unique_words, and the entire string of testdata.
- Returns nothing. Instead, alters the values of emission matrix by adding 0.00001 and normalizing.
- """
- EPSILON = 0.00001
- seen = False
- # check if we have seen a word already
- for word in sentence:
- if word not in unique_words:
- seen = True
- unique_words.add(word)
- for tag in emission_matrix:
- emission_matrix[tag][word] = 0
- # if we have any word that we have not seen already, do this
- if seen:
- # add 0.0001 to every tag / word in emission_matrix
- for tag in emission_matrix:
- for word in emission_matrix[tag]:
- emission_matrix[tag][word] += EPSILON
- # normalize
- row_sum = 0
- for word in emission_matrix[tag]:
- row_sum += emission_matrix[tag][word]
- for word in emission_matrix[tag]:
- emission_matrix[tag][word] /= row_sum
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement