Advertisement
Guest User

Untitled

a guest
Jul 7th, 2015
205
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.22 KB | None | 0 0
  1. require 'natto'
  2. require 'twitter'
  3. require 'pp'
  4. require 'enumerator'
  5.  
  6. # ----------------形態素解析
  7. # ----------------辞書的なものの作成
  8. def parse_text(text)
  9. mecab = Natto::MeCab.new
  10. text = text.strip
  11. # 形態素解析したデータを配列に分けて突っ込む
  12. # 先頭にBEGIN、最後にENDを追加
  13. data = ["BEGIN","BEGIN"]
  14. mecab.parse(text) do |a|
  15. if a.surface != nil
  16. data << a.surface
  17. end
  18. end
  19. data << "END"
  20. # p data
  21. data.each_cons(3).each do |a|
  22. suffix = a.pop
  23. prefix = a
  24. $h[prefix] ||= []
  25. $h[prefix] << suffix
  26. end
  27. end
  28.  
  29. # ----------------マルコフ連鎖
  30. def markov()
  31. # ランダムインスタンスの生成
  32. random = Random.new
  33. # スタートは begin,beginから
  34. prefix = ["BEGIN","BEGIN"]
  35. ret = ""
  36. loop{
  37. n = $h[prefix].length
  38. prefix = [prefix[1] , $h[prefix][random.rand(0..n-1)]]
  39. ret += prefix[0] if prefix[0] != "BEGIN"
  40. if $h[prefix].last == "END"
  41. ret += prefix[1]
  42. break
  43. end
  44. }
  45. p ret
  46. return ret
  47. end
  48. # テーブル用ハッシュ
  49. $h = {}
  50.  
  51. File.open("text.txt","r") do |file|
  52. file.each do |paragraph|
  53. parse_text(paragraph)
  54. natto = Natto::MeCab.new
  55. natto.parse(paragraph) do |n|
  56. puts "#{n.surface}\t#{n.feature}"
  57. end
  58. end
  59. end
  60.  
  61. for i in 0..20
  62. markov()
  63. end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement