Advertisement
Guest User

Untitled

a guest
Apr 24th, 2019
97
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.20 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2.  
  3. import random
  4. from janome.tokenizer import Tokenizer
  5.  
  6. # Janomeを使用してテキストデータを単語に分割する
  7. def wakati(text):
  8. text = text.replace('\n','') #改行を削除
  9. text = text.replace('\r','') #スペースを削除
  10. text = text.replace('「','') ##開き括弧を削除
  11. text = text.replace('」','') ##開き括弧を削除
  12. text = text.replace('(','') ##開き括弧を削除
  13. text = text.replace(')','') ##閉じ括弧を削除
  14. text = text.replace('(','') ##開き括弧を削除
  15. text = text.replace(')','') ##閉じ括弧を削除
  16. t = Tokenizer()
  17. result =t.tokenize(text, wakati=True)
  18. return result
  19.  
  20. #デフォルトの文の数は20
  21. def generate_text(num_sentence=20):
  22. filename = "sample.txt"
  23. src = open(filename, "r",encoding="utf-8").read()
  24. wordlist = wakati(src)
  25. ## src = open(filename, "r").read() に,encoding="utf-8"を追加
  26.  
  27. #マルコフ連鎖用のテーブルを作成
  28. markov = {}
  29. w1 = ""
  30. w2 = ""
  31. for word in wordlist:
  32. if w1 and w2:
  33. if (w1, w2) not in markov:
  34. markov[(w1, w2)] = []
  35. markov[(w1, w2)].append(word)
  36. w1, w2 = w2, word
  37.  
  38. #文章の自動生成
  39. count_kuten = 0 #句点「。」の数
  40. num_sentence= num_sentence
  41. sentence = ""
  42. w1, w2 = random.choice(list(markov.keys()))
  43. while count_kuten < num_sentence:
  44. tmp = random.choice(markov[(w1, w2)])
  45. sentence += tmp
  46. if(tmp=='。'):
  47. count_kuten += 1
  48. sentence += '\n' #1文ごとに改行
  49. w1, w2 = w2, tmp
  50. ## sentence += tmp # sentenceにtmpを加える
  51. ## count_kuten += 1 # count_kutenの数を1増やす
  52.  
  53. with open('takuya.txt', 'a', encoding = 'utf_8') as f:
  54. f.writelines(sentence)
  55. ##  with open('takuya.txt', 'a', encoding = 'utf_8') as f:  # dics_markov.txtを末尾追加で書き込み用で開く
  56. ##  f.writelines(sentence) # fにsentenceを書き込む
  57.  
  58. print(sentence)
  59.  
  60. if __name__ == "__main__":
  61. generate_text()
  62. ## if __name__ == "__main__":  # 外部からインポートした時に自動で実行しないようにする
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement