Advertisement
Guest User

Untitled

a guest
Feb 14th, 2016
56
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.94 KB | None | 0 0
  1. class CorpusModel m where
  2. tags :: m -> [Tag]
  3. tag_name :: m -> Tag -> String
  4. tag_count_u :: m -> Tag -> Int
  5. tag_count_b :: m -> (Tag, Tag) -> Int
  6. tag_count_t :: m -> (Tag, Tag, Tag) -> Int
  7. word_tag_count :: m -> Word -> Tag -> Int
  8.  
  9. viterbi :: CorpusModel m => m -> [Word] -> [Tag]
  10. viterbi m s = let -- list all possible ends tag pairs, and find the maximum one.
  11. candidates = [(taccum (len+1) t1 t2 TagStop, (t1, t2)) | t1 <- validtags, t2 <- validtags]
  12. (_, (t1, t2)) = maximumBy value_compare candidates
  13. -- utility step function that extract the previous tag
  14. -- from tag pair (t1,t2) at step n.
  15. step (n,t1,t2) = fromJust $ snd $ pi_cached !# n !# (t1,t2)
  16. -- repeat the above step utils the full path of tags are extracted,
  17. -- note that this path is reversed.
  18. path = t2 : t1 : map step (zip3 (reverse [3..len]) (tail path) path)
  19. in reverse path
  20. where
  21. validtags = tags m
  22. len = length s
  23. min_tag = TagStart
  24. max_tag = maximum validtags
  25. tag_range = ((min_tag,min_tag),(max_tag,max_tag))
  26. sentence = listArray (1,len) s
  27.  
  28. -- word probability
  29. wprob n = let cnt = word_tag_count m (sentence !# n)
  30. in \t -> (fromIntegral (cnt t) / fromIntegral (tag_count_u m t)) :: Double
  31.  
  32. -- one-step transition probability
  33. tprob t1 t2 t3 = (fromIntegral (tag_count_t m (t1,t2,t3)) / fromIntegral (tag_count_b m (t1,t2))):: Double
  34. -- accumulated transition probability
  35. taccum 1 t1 t2 t3 = tprob t1 t2 t3
  36. taccum n t1 t2 t3 = get_value (pi_cached !# (n-1) !# (t1, t2)) * tprob t1 t2 t3
  37.  
  38. pi :: Int -> (Tag,Tag) -> (Double, Maybe Tag)
  39. pi n (t2,t3) | t3 `elem` [TagStart, TagStop] = (0, Nothing)
  40. | n == 1 && t2 /= TagStart = (0, Nothing)
  41. | n > 1 && t2 `elem` [TagStart, TagStop] = (0, Nothing)
  42. -- (n = 1 ==> t2 = TagStart) && (n > 1 ==> t2 /= TagStart,TagStop)
  43. | otherwise = let wprobn = wprob n
  44. in if n <= 2 then
  45. (taccum n TagStart t2 t3 * wprobn t3, Just TagStart)
  46. else
  47. let candidates = [(taccum n t1 t2 t3 * wprobn t3, Just t1) | t1 <- validtags]
  48. in maximumBy value_compare candidates
  49. -- cached pi values
  50. -- by the lazy semantics, each value is computed only when it is necessary and always computed only once.
  51. pi_cached :: Array Int (Array (Tag,Tag) (Double, Maybe Tag))
  52. pi_cached = listArray (1,len)
  53. ( flip map [1..len] (\n ->
  54. listArray tag_range $ map (pi n) (range tag_range)))
  55.  
  56. get_value (v, _) = v
  57. value_compare a b = compare (get_value a) (get_value b)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement