Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class TNET():
- """
- Target network is for calculating the maximum estimated Q-value in given action a.
- """
- def __init__(self, in_units, out_units, hidden_units=250):
- self.in_units = in_units
- self.out_units = out_units
- self.hidden_units = hidden_units
- self._model()
- def _model(self):
- with tf.variable_scope('tnet'):
- # input layer
- self.x = tf.placeholder(tf.float32, shape=(None, self.in_units))
- # from input layer to hidden layer1
- W1=tf.get_variable('W1', shape=(self.in_units, self.hidden_units), initializer=tf.random_normal_initializer())
- # from hidden layer1 to hiiden layer2
- W2=tf.get_variable('W2', shape=(self.hidden_units, self.hidden_units), initializer=tf.random_normal_initializer())
- # from hidden layer2 to output layer
- W3=tf.get_variable('W3', shape=(self.hidden_units, self.out_units), initializer=tf.random_normal_initializer())
- # the bias of hidden layer1
- b1=tf.get_variable('b1', shape=(self.hidden_units), initializer=tf.zeros_initializer())
- # the bias of hidden layer2
- b2=tf.get_variable('b2', shape=(self.hidden_units), initializer=tf.zeros_initializer())
- # the ouput of hidden layer1
- h1=tf.nn.tanh(tf.matmul(self.x, W1)+b1)
- # the output of hidden layer2
- h2=tf.nn.tanh(tf.matmul(h1, W2)+b2)
- # the output of output layer, that is, Q-value
- self.q=tf.matmul(h2, W3)
- self.params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='tnet')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement