Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def trigram_viterbi(hmm, sentence):
- """
- """
- # Initialization
- viterbi = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
- backpointer = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
- #unique_tags = set(hmm.initial_distribution.keys()).union(set(hmm.transition_matrix.keys()))
- unique_tags = set()
- for key in hmm.initial_distribution:
- unique_tags.add(key)
- unique_tags = unique_tags.union(set(hmm.initial_distribution[key].keys()))
- for key in hmm.transition_matrix:
- unique_tags.add(key)
- unique_tags = unique_tags.union(set(hmm.transition_matrix[key].keys()))
- for tag1 in unique_tags:
- for tag2 in unique_tags:
- if (hmm.initial_distribution[tag1][tag2] != 0) and (hmm.emission_matrix[tag2][sentence[1]] != 0) and (hmm.emission_matrix[tag1][sentence[0]] != 0):
- viterbi[tag1][tag2][1] = math.log(hmm.initial_distribution[tag1][tag2]) + math.log(hmm.emission_matrix[tag2][sentence[1]]) + math.log(hmm.emission_matrix[tag1][sentence[0]])
- else:
- viterbi[tag1][tag2][1] = -1 * float('inf')
- # Dynamic programming.
- for t in range(2, len(sentence)):
- backpointer["No_Path"]["No_Path"][t] = "No_Path"
- for s in unique_tags:
- for w in unique_tags:
- max_value = -1 * float('inf')
- max_state = None
- for s_prime in unique_tags:
- val1 = viterbi[s_prime][s][t-1]
- val2 = -1 * float('inf')
- if hmm.transition_matrix[s_prime][s][w] != 0:
- val2 = math.log(hmm.transition_matrix[s_prime][s][w])
- curr_value = val1 + val2
- if curr_value > max_value:
- max_value = curr_value
- max_state = s_prime
- val3 = -1 * float('inf')
- if hmm.emission_matrix[w][sentence[t]] != 0:
- val3 = math.log(hmm.emission_matrix[w][sentence[t]])
- val4 = -1 * float('inf')
- if hmm.emission_matrix[s][sentence[t-1]] != 0:
- val4 = math.log(hmm.emission_matrix[s][sentence[t-1]])
- viterbi[s][w][t] = max_value + val3# + val4
- # bp
- if max_state == None:
- backpointer[s][w][t] = "No_Path"
- else:
- backpointer[s][w][t] = max_state
- # Termination1
- max_value = -1 * float('inf')
- l_prime = None
- m_prime = None
- final_time = len(sentence) - 1
- for a in unique_tags:
- for b in unique_tags:
- if viterbi[a][b][final_time] > max_value:
- max_value = viterbi[a][b][final_time]
- l_prime = a
- m_prime = b
- if l_prime == None:
- l_prime = "No_Path"
- if m_prime == None:
- m_prime = "No_Path"
- # Traceback
- tagged_sentence = []
- tagged_sentence.append((sentence[len(sentence)-1], m_prime))
- tagged_sentence.append((sentence[len(sentence)-2], l_prime))
- for i in range(len(sentence)-3, -1, -1):
- curr_tag = backpointer[tagged_sentence[-1][1]][tagged_sentence[-2][1]][i+2]
- tagged_sentence.append((sentence[i], curr_tag))
- tagged_sentence.reverse()
- return tagged_sentence
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement