SHARE
TWEET

Untitled

a guest Oct 27th, 2015 78 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. """
  2. Joost van Amersfoort - <joost.van.amersfoort@gmail.com>
  3. Otto Fabius - <ottofabius@gmail.com
  4.  
  5. License: MIT
  6. """
  7.  
  8. import numpy as np
  9. import theano as th
  10. import theano.tensor as T
  11.  
  12. """This class implements an auto-encoder with Variational Bayes"""
  13.  
  14. class VA:
  15.     def __init__(self, HU_decoder, HU_encoder, dimX, dimZ, batch_size, L=1, learning_rate=0.01):
  16.         self.HU_decoder = HU_decoder
  17.         self.HU_encoder = HU_encoder
  18.  
  19.         self.dimX = dimX
  20.         self.dimZ = dimZ
  21.         self.L = L
  22.         self.learning_rate = learning_rate
  23.         self.batch_size = batch_size
  24.  
  25.         self.sigmaInit = 0.01
  26.         self.lowerbound = 0
  27.  
  28.         self.continuous = False
  29.  
  30.  
  31.     def initParams(self):
  32.         """Initialize weights and biases, depending on if continuous data is modeled an extra weight matrix is created"""
  33.         W1 = np.random.normal(0,self.sigmaInit,(self.HU_encoder,self.dimX))
  34.         b1 = np.random.normal(0,self.sigmaInit,(self.HU_encoder,1))
  35.  
  36.         W2 = np.random.normal(0,self.sigmaInit,(self.dimZ,self.HU_encoder))
  37.         b2 = np.random.normal(0,self.sigmaInit,(self.dimZ,1))
  38.  
  39.         W3 = np.random.normal(0,self.sigmaInit,(self.dimZ,self.HU_encoder))
  40.         b3 = np.random.normal(0,self.sigmaInit,(self.dimZ,1))
  41.        
  42.         W4 = np.random.normal(0,self.sigmaInit,(self.HU_decoder,self.dimZ))
  43.         b4 = np.random.normal(0,self.sigmaInit,(self.HU_decoder,1))
  44.  
  45.         W5 = np.random.normal(0,self.sigmaInit,(self.dimX,self.HU_decoder))
  46.         b5 = np.random.normal(0,self.sigmaInit,(self.dimX,1))
  47.  
  48.         if self.continuous:
  49.             W6 = np.random.normal(0,self.sigmaInit,(self.dimX,self.HU_decoder))
  50.             b6 = np.random.normal(0,self.sigmaInit,(self.dimX,1))
  51.             self.params = [W1,W2,W3,W4,W5,W6,b1,b2,b3,b4,b5,b6]
  52.         else:
  53.                 self.params = [W1,W2,W3,W4,W5,b1,b2,b3,b4,b5]
  54.  
  55.         self.h = [0.01] * len(self.params)
  56.  
  57.  
  58.     def initH(self,miniBatch):
  59.         """Compute the gradients and use this to initialize h"""
  60.         totalGradients = self.getGradients(miniBatch)
  61.         for i in xrange(len(totalGradients)):
  62.             self.h[i] += totalGradients[i]*totalGradients[i]
  63.  
  64.     def createGradientFunctions(self):
  65.         #Create the Theano variables
  66.         W1,W2,W3,W4,W5,W6,x,eps = T.dmatrices("W1","W2","W3","W4","W5","W6","x","eps")
  67.  
  68.         #Create biases as cols so they can be broadcasted for minibatches
  69.         b1,b2,b3,b4,b5,b6 = T.dcols("b1","b2","b3","b4","b5","b6")
  70.  
  71.         if self.continuous:
  72.             h_encoder = T.nnet.softplus(T.dot(W1,x) + b1)
  73.         else:  
  74.             h_encoder = T.tanh(T.dot(W1,x) + b1)
  75.  
  76.         mu_encoder = T.dot(W2,h_encoder) + b2
  77.         log_sigma_encoder = 0.5*(T.dot(W3,h_encoder) + b3)
  78.  
  79.         #Find the hidden variable z
  80.         z = mu_encoder + T.exp(log_sigma_encoder)*eps
  81.  
  82.         prior = 0.5* T.sum(1 + 2*log_sigma_encoder - mu_encoder**2 - T.exp(2*log_sigma_encoder))
  83.  
  84.  
  85.         #Set up decoding layer
  86.         if self.continuous:
  87.             h_decoder = T.nnet.softplus(T.dot(W4,z) + b4)
  88.             mu_decoder = T.nnet.sigmoid(T.dot(W5,h_decoder) + b5)
  89.             log_sigma_decoder = 0.5*(T.dot(W6,h_decoder) + b6)
  90.             logpxz = T.sum(-(0.5 * np.log(2 * np.pi) + log_sigma_decoder) - 0.5 * ((x - mu_decoder) / T.exp(log_sigma_decoder))**2)
  91.             gradvariables = [W1,W2,W3,W4,W5,W6,b1,b2,b3,b4,b5,b6]
  92.         else:
  93.             h_decoder = T.tanh(T.dot(W4,z) + b4)
  94.             y = T.nnet.sigmoid(T.dot(W5,h_decoder) + b5)
  95.             logpxz = -T.nnet.binary_crossentropy(y,x).sum()
  96.             gradvariables = [W1,W2,W3,W4,W5,b1,b2,b3,b4,b5]
  97.  
  98.  
  99.         logp = logpxz + prior
  100.  
  101.         #Compute all the gradients
  102.         derivatives = T.grad(logp,gradvariables)
  103.  
  104.         #Add the lowerbound so we can keep track of results
  105.         derivatives.append(logp)
  106.  
  107.         self.gradientfunction = th.function(gradvariables + [x,eps], derivatives, on_unused_input='ignore')
  108.         self.lowerboundfunction = th.function(gradvariables + [x,eps], logp, on_unused_input='ignore')
  109.  
  110.     def iterate(self, data):
  111.         """Main method, slices data in minibatches and performs an iteration"""
  112.         [N,dimX] = data.shape
  113.         batches = np.arange(0,N,self.batch_size)
  114.         if batches[-1] != N:
  115.             batches = np.append(batches,N)
  116.  
  117.         for i in xrange(0,len(batches)-2):
  118.             miniBatch = data[batches[i]:batches[i+1]]
  119.             totalGradients = self.getGradients(miniBatch.T)
  120.             self.updateParams(totalGradients,N,miniBatch.shape[0])
  121.  
  122.     def getLowerBound(self,data):
  123.         """Use this method for example to compute lower bound on testset"""
  124.         lowerbound = 0
  125.         [N,dimX] = data.shape
  126.         batches = np.arange(0,N,self.batch_size)
  127.         if batches[-1] != N:
  128.             batches = np.append(batches,N)
  129.  
  130.         for i in xrange(0,len(batches)-2):
  131.             miniBatch = data[batches[i]:batches[i+1]]
  132.             e = np.random.normal(0,1,[self.dimZ,miniBatch.shape[0]])
  133.             lowerbound += self.lowerboundfunction(*(self.params),x=miniBatch.T,eps=e)
  134.  
  135.         return lowerbound/N
  136.  
  137.  
  138.     def getGradients(self,miniBatch):
  139.         """Compute the gradients for one minibatch and check if these do not contain NaNs"""
  140.         totalGradients = [0] * len(self.params)
  141.         for l in xrange(self.L):
  142.             e = np.random.normal(0,1,[self.dimZ,miniBatch.shape[1]])
  143.             gradients = self.gradientfunction(*(self.params),x=miniBatch,eps=e)
  144.             self.lowerbound += gradients[-1]
  145.  
  146.             for i in xrange(len(self.params)):
  147.                 totalGradients[i] += gradients[i]
  148.  
  149.         return totalGradients
  150.  
  151.     def updateParams(self,totalGradients,N,current_batch_size):
  152.         """Update the parameters, taking into account AdaGrad and a prior"""
  153.         for i in xrange(len(self.params)):
  154.             self.h[i] += totalGradients[i]*totalGradients[i]
  155.             if i < 5 or (i < 6 and len(self.params) == 12):
  156.                 prior = 0.5*self.params[i]
  157.             else:
  158.                 prior = 0
  159.  
  160.             self.params[i] += self.learning_rate/np.sqrt(self.h[i]) * (totalGradients[i] - prior*(current_batch_size/N))
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top