Advertisement
Guest User

Untitled

a guest
May 3rd, 2016
61
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.74 KB | None | 0 0
  1. from chainer import FunctionSet
  2. from chainer.functions import *
  3. from collections import Counter
  4. from chainer import Variable, FunctionSet, optimizers
  5. import numpy as np
  6. import math
  7. import time
  8.  
  9.  
  10. text = open("sample.txt").read().lower().split()#小文字に変換してる
  11. text_data = sorted(list(set(text)))
  12. vocab_size = len(text_data)
  13.  
  14. # ここを大きくして欲しいす
  15. hidden_size = 20
  16. # ------------------------
  17.  
  18. def convert_to_your_word_id(word):
  19. return text_data.index(word)
  20.  
  21. def forward(model, word_in, word_ans, h): # sentenceはstrの配列。MeCabなどの出力を想定。
  22. in_id = convert_to_your_word_id(word_in) # 単語をIDに変換。自分で適当に実装する。
  23. out_id = np.array([convert_to_your_word_id(word_ans)])
  24.  
  25. x = np.zeros((1, vocab_size), dtype=np.float32)
  26. x[0][in_id] = float(1)
  27. x = Variable(x)
  28.  
  29. h = tanh(model.w_xh(x) + model.w_hh(h)) # 隠れ層の更新
  30.  
  31. y = softmax(model.w_hy(h))
  32. accum_loss = softmax_cross_entropy(y, Variable(out_id)) # 損失の蓄積
  33. return accum_loss, h
  34.  
  35. myh = Variable(np.zeros((1, hidden_size), dtype=np.float32)) # 隠れ層の初期値
  36.  
  37. def train(model, text):
  38. opt = optimizers.Adam() # 確率的勾配法を使用
  39. opt.setup(model) # 学習器の初期化
  40. h = Variable(np.zeros((1, hidden_size), dtype=np.float32)) # 隠れ層の初期値
  41. for i in range(100):
  42. x = text[-1]
  43. for t in text:
  44. opt.zero_grads(); # 勾配の初期化
  45. accum_loss, h = forward(model, x, t, h) # 損失の計算
  46. accum_loss.backward() # 誤差逆伝播
  47. opt.clip_grads(10) # 大きすぎる勾配を抑制
  48. opt.update() # パラメータの更新
  49. x = t
  50. myh = h
  51.  
  52. model = FunctionSet(
  53. w_xh = Linear(vocab_size, hidden_size), # 入力層(one-hot) -> 隠れ層
  54. w_hh = Linear(hidden_size, hidden_size), # 隠れ層 -> 隠れ層
  55. w_hy = Linear(hidden_size, vocab_size), # 隠れ層 -> 出力層
  56. )
  57.  
  58.  
  59. start = time.time()
  60. train(model, text)
  61. print(time.time() - start)
  62.  
  63. def get_output(model, word_in, h): # sentenceはstrの配列。MeCabなどの出力を想定。
  64. in_id = convert_to_your_word_id(word_in) # 単語をIDに変換。自分で適当に実装する。
  65. x = np.zeros((1, vocab_size), dtype=np.float32)
  66. x[0][in_id] = float(1)
  67. x = Variable(x)
  68. h = tanh(model.w_xh(x) + model.w_hh(h)) # 隠れ層の更新
  69. y = softmax(model.w_hy(h))
  70. return y.data[0], h # 結合確率の計算結果を返す
  71.  
  72. for word in ["a", "b", "c", "d", "e", "f", "g"]:
  73. myh = Variable(np.zeros((1, hidden_size), dtype=np.float32)) # 隠れ層の
  74. output, myh = get_output(model, word, myh)
  75. print(text_data[list(output).index(np.max(output))])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement