Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- require 'natto'
- require 'twitter'
- require 'pp'
- require 'enumerator'
- # ----------------形態素解析
- # ----------------辞書的なものの作成
- def parse_text(text)
- mecab = Natto::MeCab.new
- text = text.strip
- # 形態素解析したデータを配列に分けて突っ込む
- # 先頭にBEGIN、最後にENDを追加
- data = ["BEGIN","BEGIN"]
- mecab.parse(text) do |a|
- if a.surface != nil
- data << a.surface
- end
- end
- data << "END"
- # p data
- data.each_cons(3).each do |a|
- suffix = a.pop
- prefix = a
- $h[prefix] ||= []
- $h[prefix] << suffix
- end
- end
- # ----------------マルコフ連鎖
- def markov()
- # ランダムインスタンスの生成
- random = Random.new
- # スタートは begin,beginから
- prefix = ["BEGIN","BEGIN"]
- ret = ""
- loop{
- n = $h[prefix].length
- prefix = [prefix[1] , $h[prefix][random.rand(0..n-1)]]
- ret += prefix[0] if prefix[0] != "BEGIN"
- if $h[prefix].last == "END"
- ret += prefix[1]
- break
- end
- }
- p ret
- return ret
- end
- # テーブル用ハッシュ
- $h = {}
- File.open("text.txt","r") do |file|
- file.each do |paragraph|
- parse_text(paragraph)
- natto = Natto::MeCab.new
- natto.parse(paragraph) do |n|
- puts "#{n.surface}\t#{n.feature}"
- end
- end
- end
- for i in 0..20
- markov()
- end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement