Advertisement
Guest User

Untitled

a guest
Apr 18th, 2019
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.34 KB | None | 0 0
  1. def trigram_viterbi(hmm, sentence):
  2. """
  3. """
  4. # Initialization
  5. viterbi = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
  6. backpointer = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
  7. #unique_tags = set(hmm.initial_distribution.keys()).union(set(hmm.transition_matrix.keys()))
  8.  
  9. unique_tags = set()
  10.  
  11. for key in hmm.initial_distribution:
  12. unique_tags.add(key)
  13. unique_tags = unique_tags.union(set(hmm.initial_distribution[key].keys()))
  14.  
  15. for key in hmm.transition_matrix:
  16. unique_tags.add(key)
  17. unique_tags = unique_tags.union(set(hmm.transition_matrix[key].keys()))
  18.  
  19. for tag1 in unique_tags:
  20. for tag2 in unique_tags:
  21. if (hmm.initial_distribution[tag1][tag2] != 0) and (hmm.emission_matrix[tag2][sentence[1]] != 0) and (hmm.emission_matrix[tag1][sentence[0]] != 0):
  22. viterbi[tag1][tag2][1] = math.log(hmm.initial_distribution[tag1][tag2]) + math.log(hmm.emission_matrix[tag2][sentence[1]]) + math.log(hmm.emission_matrix[tag1][sentence[0]])
  23. else:
  24. viterbi[tag1][tag2][1] = -1 * float('inf')
  25.  
  26.  
  27. # Dynamic programming.
  28. for t in range(2, len(sentence)):
  29. backpointer["No_Path"]["No_Path"][t] = "No_Path"
  30. for s in unique_tags:
  31. for w in unique_tags:
  32. max_value = -1 * float('inf')
  33. max_state = None
  34. for s_prime in unique_tags:
  35. val1 = viterbi[s_prime][s][t-1]
  36. val2 = -1 * float('inf')
  37. if hmm.transition_matrix[s_prime][s][w] != 0:
  38. val2 = math.log(hmm.transition_matrix[s_prime][s][w])
  39. curr_value = val1 + val2
  40. if curr_value > max_value:
  41. max_value = curr_value
  42. max_state = s_prime
  43. val3 = -1 * float('inf')
  44. if hmm.emission_matrix[w][sentence[t]] != 0:
  45. val3 = math.log(hmm.emission_matrix[w][sentence[t]])
  46. val4 = -1 * float('inf')
  47. if hmm.emission_matrix[s][sentence[t-1]] != 0:
  48. val4 = math.log(hmm.emission_matrix[s][sentence[t-1]])
  49. viterbi[s][w][t] = max_value + val3# + val4
  50.  
  51. # bp
  52. if max_state == None:
  53. backpointer[s][w][t] = "No_Path"
  54. else:
  55. backpointer[s][w][t] = max_state
  56.  
  57.  
  58. # Termination1
  59. max_value = -1 * float('inf')
  60. l_prime = None
  61. m_prime = None
  62. final_time = len(sentence) - 1
  63. for a in unique_tags:
  64. for b in unique_tags:
  65. if viterbi[a][b][final_time] > max_value:
  66. max_value = viterbi[a][b][final_time]
  67. l_prime = a
  68. m_prime = b
  69. if l_prime == None:
  70. l_prime = "No_Path"
  71. if m_prime == None:
  72. m_prime = "No_Path"
  73.  
  74. # Traceback
  75. tagged_sentence = []
  76. tagged_sentence.append((sentence[len(sentence)-1], m_prime))
  77. tagged_sentence.append((sentence[len(sentence)-2], l_prime))
  78.  
  79. for i in range(len(sentence)-3, -1, -1):
  80. curr_tag = backpointer[tagged_sentence[-1][1]][tagged_sentence[-2][1]][i+2]
  81. tagged_sentence.append((sentence[i], curr_tag))
  82. tagged_sentence.reverse()
  83. return tagged_sentence
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement