Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # represents a batch of training data
- self.X = tf.placeholder(tf.float32, shape=(None, D))
- # encoder
- self.encoder_layers = []
- M_in = D
- for M_out in hidden_layer_sizes[:-1]:
- h = DenseLayer(M_in, M_out)
- self.encoder_layers.append(h)
- M_in = M_out
- # for convenience, we'll refer to the final encoder size as M
- # also the input to the decoder size
- M = hidden_layer_sizes[-1]
- h = DenseLayer(M_in, 2 * M, f=lambda x: x)
- self.encoder_layers.append(h)
- # get the mean and variance / std dev of Z.
- # note that the variance must be > 0
- # we can get a sigma (standard dev) > 0 from an unbounded variable by
- # passing it through the softplus function.
- # add a small amount for smoothing.
- current_layer_value = self.X
- for layer in self.encoder_layers:
- current_layer_value = layer.forward(current_layer_value)
- self.means = current_layer_value[:, :M]
- self.stddev = tf.nn.softplus(current_layer_value[:, M:]) + 1e-6
- # get a sample of Z
- with st.value_type(st.SampleValue()):
- self.Z = st.StochasticTensor(Normal(loc=self.means, scale=self.stddev))
- # decoder
- self.decoder_layers = []
- M_in = M
- for M_out in reversed(hidden_layer_sizes[:-1]):
- h = DenseLayer(M_in, M_out)
- self.decoder_layers.append(h)
- M_in = M_out
- # since there needs to be M_out means + M_out variances
- h = DenseLayer(M_in, D, f=lambda x: x)
- self.decoder_layers.append(h)
- posterior_predictive_logits = logits # save for later
- loc=current_layer_value
- scale=tf.nn.softplus(current_layer_value) + 1e-6
- # get the output
- self.X_hat_distribution = Normal(loc,scale)
- # take samples from X_hat
- # we will call this the posterior predictive sample
- self.posterior_predictive = self.X_hat_distribution.sample()
- self.posterior_predictive_probs = tf.nn.relu(logits)
- # take sample from a Z ~ N(0, 1)
- # and put it through the decoder
- # we will call this the prior predictive sample
- standard_normal = Normal(
- loc=np.zeros(M, dtype=np.float32),
- scale=np.ones(M, dtype=np.float32)
- )
- Z_std = standard_normal.sample(1)
- current_layer_value = Z_std
- for layer in self.decoder_layers:
- current_layer_value = layer.forward(current_layer_value)
- #logits = current_layer_value
- loc=current_layer_value
- scale=tf.nn.softplus(current_layer_value) + 1e-6
- logits = current_layer_value
- prior_predictive_dist = Normal(loc,scale)
- self.prior_predictive = prior_predictive_dist.sample()
- self.prior_predictive_probs = tf.nn.relu(logits)
- # prior predictive from input
- # only used for generating visualization
- self.Z_input = tf.placeholder(tf.float32, shape=(None, M))
- current_layer_value = self.Z_input
- for layer in self.decoder_layers:
- current_layer_value = layer.forward(current_layer_value)
- logits = tf.nn.relu(current_layer_value)
- self.prior_predictive_from_input_probs = tf.nn.relu(logits)
- # now build the cost
- kl = tf.reduce_sum(
- tf.contrib.distributions.kl_divergence(self.Z.distribution, standard_normal), 1)
- expected_log_likelihood = tf.reduce_sum(self.X_hat_distribution.log_prob(self.X),1)
- #expected_log_likelihood = tf.reduce_sum(self.X - self.X_hat_distribution.log_prob(self.X),1)
- #expected_log_likelihood = tf.reduce_sum(tf.pow(self.X - self.posterior_predictive ,2)+0.01)
- self.elbo = tf.reduce_sum(expected_log_likelihood - kl)
- self.train_op = tf.train.RMSPropOptimizer(learning_rate=0.001).minimize(-self.elbo)
- # set up session and variables for later
- self.init_op = tf.global_variables_initializer()
- self.sess = tf.InteractiveSession()
- self.sess.run(self.init_op)
Add Comment
Please, Sign In to add comment