Untitled

class LSTM_Cell:
    def __init__(self, time_steps=None, input_size=None, cell_size=None, learning_rate=1e-3):
        assert cell_size is not None and input_size is not None

        self.input_size = input_size
        self.cell_size = cell_size

        self.inputs = tf.placeholder(tf.float32, shape=[None, None, input_size], name="lstm_inputs")
        self.targets = tf.placeholder(tf.float32, shape=[None, None, input_size], name="lstm_targets")

        self.w_f = tf.Variable(tf.random_normal(shape=[self.input_size, self.cell_size]), name="w_f")
        self.u_f = tf.Variable(tf.random_normal(shape=[self.cell_size, self.cell_size]), name="u_f")
        self.b_f = tf.Variable(tf.constant(0., shape=[self.cell_size]), name="b_f")

        self.w_i = tf.Variable(tf.random_normal(shape=[self.input_size, self.cell_size]), name="w_i")
        self.u_i = tf.Variable(tf.random_normal(shape=[self.cell_size, self.cell_size]), name="u_i")
        self.b_i = tf.Variable(tf.constant(0., shape=[self.cell_size]), name="b_i")

        self.w_o = tf.Variable(tf.random_normal(shape=[self.input_size, self.cell_size]), name="w_o")
        self.u_o = tf.Variable(tf.random_normal(shape=[self.cell_size, self.cell_size]), name="u_o")
        self.b_o = tf.Variable(tf.constant(0., shape=[self.cell_size]), name="b_0")

        self.w_c = tf.Variable(tf.random_normal(shape=[self.input_size, self.cell_size]), name="w_c")
        self.u_c = tf.Variable(tf.random_normal(shape=[self.cell_size, self.cell_size]), name="u_cs")
        self.b_c = tf.Variable(tf.constant(0., shape=[self.cell_size]), name="b_c")

        self.learning_rate = learning_rate

        self.outputs = None
        self.last_hidden_state = None
        self.last_cell_state = None

        self.time_steps = time_steps

    def call(self, state_tuple, x):
        """
        One iteration of the LSTM cell.

        params:
            state_tuple: The previous hidden and cell state (of the shape [hidden or cell, batch_size])
            x: The batch input into the neural network (of the shape [batch_size, input_size])
               Example: [[3, 2, 1], [1, 2, 3]]  -> Where each row represents a item in the batch
        returns:
            A new state tuple representing the new hidden and cel state
        """
        previous_hidden, previous_cell_state = tf.unstack(state_tuple)

        f = tf.nn.sigmoid(tf.matmul(x, self.w_f) + tf.matmul(previous_hidden, self.u_f) + self.b_f)
        i = tf.nn.sigmoid(tf.matmul(x, self.w_i) + tf.matmul(previous_hidden, self.u_i) + self.b_i)
        o = tf.nn.sigmoid(tf.matmul(x, self.w_o) + tf.matmul(previous_hidden, self.u_o) + self.b_o)

        cell_state_additions = tf.nn.tanh(tf.matmul(x, self.w_c) + tf.matmul(x, self.w_c) + self.b_c)

        cell_state = tf.multiply(cell_state_additions, i) + tf.multiply(previous_cell_state, f)
        new_hidden = tf.nn.tanh(tf.multiply(cell_state, o))

        return tf.stack([new_hidden, cell_state])

    def dynamic_rnn(self, input_sequence=None, dynamic_output=False, initial_state_tuple=None):
        """
        Given a batch major input sequence it will run convert it to time major and run through
        the LSTM cell step by step.

        params:
            input_sequence: A batch major input sequence (of the shape [batch_size, sequence_length, input_size])
            initial_state_tuple: A state tuple that should be used otherwise a zero one will be used.

        returns:
            The hidden states and cell states for each batch item for each time step (returned batch_major and
            not time major).
        """
        input_sequence = tf.transpose(input_sequence, [1, 0, 2]) #tranpose to allow for batch processing

        batch_items = tf.shape(input_sequence)[1]

        if initial_state_tuple is None:
            initial_state_tuple = self.initial_state_tuple(batch_items)

        state_tuples = tf.scan(self.call, input_sequence, initializer=initial_state_tuple)

        hidden_states, cell_states = self.split_state_tuples(state_tuples)
        last_hidden_states, last_cell_states = tf.unstack(tf.gather(state_tuples, tf.shape(state_tuples)[1]))

        return hidden_states, cell_states, tf.stack([last_hidden_states, last_cell_states])