Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class Model(object):
- def __init__(self, _input, is_training, hidden_size, vocab_size, num_layers,
- dropout=config.trainer.dropout, init_scale=config.trainer.init_scale):
- self.is_training = is_training
- self.input_obj = _input
- self.batch_size = _input.batch_size
- self.num_steps = _input.num_steps
- self.hidden_size = hidden_size
- # create the word embeddings
- with tf.device("/cpu:0"):
- randomized = tf.random_uniform([vocab_size, hidden_size], -init_scale, init_scale)
- print("randomized: ", randomized)
- embedding = tf.Variable(randomized)
- inputs = tf.nn.embedding_lookup(embedding, self.input_obj.input_data)
- if is_training and dropout < 1:
- inputs = tf.nn.dropout(inputs, dropout)
- # set up the state storage / extraction
- self.init_state = tf.placeholder(tf.float32, [num_layers, 2, self.batch_size, hidden_size])
- state_per_layer_list = tf.unstack(self.init_state, axis=0)
- rnn_tuple_state = tuple([tf.contrib.rnn.LSTMStateTuple(state_per_layer_list[idx][0], state_per_layer_list[idx][1])for idx in range(num_layers)])
- # create an LSTM cell to be unrolled
- print("Hidden size: ", hidden_size)
- cell = tf.contrib.rnn.LSTMCell(hidden_size, forget_bias=config.trainer.forget_bias)
- # add a dropout wrapper if training
- if is_training and dropout < 1:
- cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=dropout)
- if num_layers > 1:
- cell = tf.contrib.rnn.MultiRNNCell([cell for _ in range(num_layers)], state_is_tuple=True)
- print("input: ", inputs)
- output, self.state = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32, initial_state=rnn_tuple_state)
- # reshape to (batch_size * num_steps, hidden_size)
- output = tf.reshape(output, [-1, hidden_size])
- softmax_w = tf.Variable(tf.random_uniform([hidden_size, vocab_size], -init_scale, init_scale))
- softmax_b = tf.Variable(tf.random_uniform([vocab_size], -init_scale, init_scale))
- logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
- # Reshape logits to be a 3-D tensor for sequence loss
- logits = tf.reshape(logits, [self.batch_size, self.num_steps, vocab_size])
- # Use the contrib sequence loss and average over the batches
- loss = tf.contrib.seq2seq.sequence_loss(logits,
- self.input_obj.targets,
- tf.ones([self.batch_size, self.num_steps], dtype=tf.float32),
- average_across_timesteps=False,
- average_across_batch=True)
- # Update the cost
- self.cost = tf.reduce_sum(loss)
- # get the prediction accuracy
- self.softmax_out = tf.nn.softmax(tf.reshape(logits, [-1, vocab_size]))
- self.predict = tf.cast(tf.argmax(self.softmax_out, axis=1), tf.int32)
- correct_prediction = tf.equal(self.predict, tf.reshape(self.input_obj.targets, [-1]))
- self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
- if not is_training:
- return
- self.learning_rate = tf.Variable(0.01, trainable=False)
- tvars = tf.trainable_variables()
- grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), 5)
- optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
- self.train_op = optimizer.apply_gradients(zip(grads, tvars),
- global_step=tf.contrib.framework.get_or_create_global_step())
- self.new_lr = tf.placeholder(tf.float32, shape=[])
- self.lr_update = tf.assign(self.learning_rate, self.new_lr)
- def assign_lr(self, session, lr_value):
- session.run(self.lr_update, feed_dict={self.new_lr: lr_value})
Add Comment
Please, Sign In to add comment