Untitled

from chainer import FunctionSet
from chainer.functions import *
from collections import Counter
from chainer import Variable, FunctionSet, optimizers
import numpy as np
import math
import time


text = open("sample.txt").read().lower().split()#小文字に変換してる
text_data = sorted(list(set(text)))
vocab_size = len(text_data)

# ここを大きくして欲しいす
hidden_size = 20
# ------------------------

def convert_to_your_word_id(word):
    return text_data.index(word)

def forward(model, word_in, word_ans, h): # sentenceはstrの配列。MeCabなどの出力を想定。
    in_id = convert_to_your_word_id(word_in) # 単語をIDに変換。自分で適当に実装する。
    out_id = np.array([convert_to_your_word_id(word_ans)])

    x = np.zeros((1, vocab_size), dtype=np.float32)
    x[0][in_id] = float(1)
    x = Variable(x)

    h = tanh(model.w_xh(x) + model.w_hh(h)) # 隠れ層の更新

    y = softmax(model.w_hy(h))
    accum_loss = softmax_cross_entropy(y, Variable(out_id)) # 損失の蓄積
    return accum_loss, h

myh = Variable(np.zeros((1, hidden_size), dtype=np.float32)) # 隠れ層の初期値

def train(model, text):
    opt = optimizers.Adam() # 確率的勾配法を使用
    opt.setup(model) # 学習器の初期化
    h = Variable(np.zeros((1, hidden_size), dtype=np.float32)) # 隠れ層の初期値
    for i in range(100):
        x = text[-1]
        for t in text:
            opt.zero_grads(); # 勾配の初期化
            accum_loss, h = forward(model, x, t, h) # 損失の計算
            accum_loss.backward() # 誤差逆伝播
            opt.clip_grads(10) # 大きすぎる勾配を抑制
            opt.update() # パラメータの更新
            x = t
    myh = h

model = FunctionSet(
    w_xh = Linear(vocab_size, hidden_size), # 入力層(one-hot) -> 隠れ層
    w_hh = Linear(hidden_size, hidden_size), # 隠れ層 -> 隠れ層
    w_hy = Linear(hidden_size, vocab_size), # 隠れ層 -> 出力層
    )


start = time.time()
train(model, text)
print(time.time() - start)

def get_output(model, word_in, h): # sentenceはstrの配列。MeCabなどの出力を想定。
    in_id = convert_to_your_word_id(word_in) # 単語をIDに変換。自分で適当に実装する。
    x = np.zeros((1, vocab_size), dtype=np.float32)
    x[0][in_id] = float(1)
    x = Variable(x)
    h = tanh(model.w_xh(x) + model.w_hh(h)) # 隠れ層の更新
    y = softmax(model.w_hy(h))
    return y.data[0], h # 結合確率の計算結果を返す

for word in ["a", "b", "c", "d", "e", "f", "g"]:
    myh = Variable(np.zeros((1, hidden_size), dtype=np.float32)) # 隠れ層の
    output, myh = get_output(model, word, myh)
    print(text_data[list(output).index(np.max(output))])