Untitled

class TNET():
    """
    Target network is for calculating the maximum estimated Q-value in given action a.
    """
    def __init__(self, in_units, out_units, hidden_units=250):
        self.in_units = in_units
        self.out_units = out_units
        self.hidden_units = hidden_units
        self._model()

    def _model(self):
        with tf.variable_scope('tnet'):
            # input layer
            self.x = tf.placeholder(tf.float32, shape=(None, self.in_units))

            # from input layer to hidden layer1
            W1=tf.get_variable('W1', shape=(self.in_units, self.hidden_units), initializer=tf.random_normal_initializer())
            # from hidden layer1 to hiiden layer2
            W2=tf.get_variable('W2', shape=(self.hidden_units, self.hidden_units), initializer=tf.random_normal_initializer())
            # from hidden layer2 to output layer
            W3=tf.get_variable('W3', shape=(self.hidden_units, self.out_units), initializer=tf.random_normal_initializer())

            # the bias of hidden layer1
            b1=tf.get_variable('b1', shape=(self.hidden_units), initializer=tf.zeros_initializer())
            # the bias of hidden layer2
            b2=tf.get_variable('b2', shape=(self.hidden_units), initializer=tf.zeros_initializer())

            # the ouput of hidden layer1
            h1=tf.nn.tanh(tf.matmul(self.x, W1)+b1)
            # the output of hidden layer2
            h2=tf.nn.tanh(tf.matmul(h1, W2)+b2)

            # the output of output layer, that is, Q-value
            self.q=tf.matmul(h2, W3)

            self.params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='tnet')