Advertisement
Guest User

NN

a guest
Nov 13th, 2013
435
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 7.96 KB | None | 0 0
  1. import numpy as np
  2. import matplotlib.pyplot as plt
  3.  
  4.  
  5. #############################################
  6. # Utility Functions
  7. #############################################
  8.  
  9. def scaleSet(X):
  10.     mu = np.mean(X, axis=1)
  11.     Sigma = np.std(X, axis=1)
  12.     X = (X - mu) / Sigma
  13.     return (X, mu, Sigma)
  14.  
  15. def scaleSetParameter(X, mu, Sigma):
  16.     X = (X - mu) / Sigma
  17.     return X
  18.  
  19.  
  20. #############################################
  21. # Network
  22. #############################################
  23.  
  24. class AFLinear:
  25.     def h(self, x):
  26.         return x
  27.    
  28.     def hd(self, x):
  29.         return np.ones(x.shape)
  30.  
  31. class AFTanh:
  32.     def h(self, x):
  33.         return np.tanh(x)
  34.            
  35.     def hd(self, x):
  36.         z = self.h(x)
  37.         return 1.0 - z**2
  38.  
  39.  
  40. def fw(x, network):
  41.     z = x
  42.     for layer in network:
  43.         zd = layer['D'] * np.append(z, 1)
  44.         a = layer['W'].dot( zd )
  45.         z = layer['fcn'].h(a)
  46.    
  47.     return z
  48.  
  49. def fwSet(X, network):
  50.     Y = np.zeros((network[-1]['W'].shape[0], X.shape[1]))
  51.    
  52.     for col in range(X.shape[1]):
  53.         x = X[:, col]
  54.         Y[:, col] = fw(x, network)
  55.        
  56.     return Y
  57.  
  58.  
  59. #############################################
  60. # Error functions
  61. #############################################
  62. def ESquaredError(y, t):
  63.     diff = y - t
  64.     return np.dot(diff, diff) / 2.0
  65.  
  66.  
  67.  
  68. #############################################
  69. # Gradient
  70. #############################################
  71. def zeroGradient(network):
  72.     g = []
  73.     for layer in network:
  74.         g.append(np.zeros(layer['W'].shape))
  75.    
  76.     return g
  77.  
  78.  
  79. def centralDifferences(x, t, E, network, eps = 1e-8):    
  80.     gradient = []
  81.    
  82.     for layer in network:
  83.         gW = np.zeros(layer['W'].shape)
  84.        
  85.         for row in range(layer['W'].shape[0]):
  86.             for col in range(layer['W'].shape[1]):
  87.                 tmp = layer['W'][row, col]
  88.                
  89.                 layer['W'][row, col] += eps
  90.                 yp = fw(x, network)
  91.                 ep = E(yp, t)
  92.                
  93.                 layer['W'][row, col] = tmp - eps
  94.                 ym = fw(x, network)
  95.                 em = E(ym, t)
  96.                
  97.                 gW[row, col] = (ep - em) / (2 * eps)
  98.                
  99.                 layer['W'][row, col] = tmp
  100.        
  101.         gradient.append(gW)
  102.        
  103.     return gradient
  104.  
  105.  
  106. def backprop(x, t, E, network):
  107.     #fw pass
  108.     z = x
  109.     aList = []
  110.     zList = []
  111.     for layer in network:
  112.         zd = layer['D'] * np.append(z, 1)
  113.         a = layer['W'].dot( zd ) # append bias, apply dropout mask
  114.         z = layer['fcn'].h(a)
  115.        
  116.         aList.append(a)
  117.         zList.append(z)
  118.    
  119.     #deltas
  120.     deltaK = z - t
  121.     deltaList = [deltaK]
  122.     for j in reversed(range(0, len(network) - 1)):
  123.        
  124.         deltaJ = np.append(network[j]['fcn'].hd(aList[j]), 1) * network[j+1]['W'].T.dot(deltaK)
  125.        
  126.         deltaK = deltaJ[0:-1]
  127.         deltaList.append(deltaK)
  128.        
  129.     deltaList.reverse()
  130.    
  131.     #gradient
  132.     gradient = []
  133.     z = x
  134.     for i in range(len(network)):
  135.         zd = network[i]['D'] * np.append(z, 1)
  136.        
  137.         if i < len(network) - 1:
  138.             delta = np.multiply(network[i+1]['D'][0:-1], deltaList[i])
  139.         else:
  140.             delta = deltaList[i]
  141.        
  142.         gW = np.outer(delta, zd)
  143.        
  144.         z = zList[i]
  145.        
  146.         gradient.append(gW)
  147.    
  148.     return gradient
  149.  
  150.  
  151. #############################################
  152. # API
  153. #############################################
  154.  
  155. def init(nNeurons, fcns):
  156.     nLayer = len(nNeurons) - 1
  157.    
  158.     network = []
  159.    
  160.     for i in range(nLayer):
  161.         W = np.random.uniform(-1, 1, (nNeurons[i+1], nNeurons[i] + 1))
  162.  
  163.         network.append({
  164.             'W': W,
  165.             'fcn': fcns[i]
  166.         })
  167.        
  168.     return network
  169.  
  170.  
  171. def train(network, X, Y, eta, alpha, E, epochs, batchsize, maxnorms, dropoutP, quiet=False):
  172.    
  173.     for epoch in range(epochs):
  174.         if not quiet:
  175.             print("training epoch %d of %d" % (epoch+1, epochs))
  176.            
  177.         idxs = np.random.permutation(range(X.shape[1]))
  178.         idx = 0
  179.        
  180.         while(idx < len(idxs)):
  181.             gradient = zeroGradient(network)              
  182.            
  183.             for batchIdx in range(batchsize):
  184.                 #create dropout mask
  185.                 for layerIdx in range(len(network)):
  186.                     p = dropoutP[layerIdx]
  187.                     size = network[layerIdx]['W'].shape[1]
  188.                     network[layerIdx]['D'] = np.random.binomial(1, p, size)
  189.                
  190.                 sampleIdx = idxs[idx]
  191.                
  192.                 gradientN = backprop(X[:, sampleIdx], Y[:, sampleIdx], E, network)
  193.                
  194.                 #print('--------------')
  195.                 #print(X[:, sampleIdx], Y[:, sampleIdx])
  196.                 #print('backprop')
  197.                 #print(gradientN)
  198.                 #print('cd')
  199.                 #print(centralDifferences(X[:, sampleIdx], Y[:, sampleIdx], E, network))
  200.                 #print('--------------')
  201.                
  202.                 for layerIdx in range(len(gradientN)):
  203.                     gradient[layerIdx] += gradientN[layerIdx]
  204.                    
  205.                 idx += 1
  206.            
  207.             for layerIdx in range(len(network)):
  208.                 gradientDescent = eta * (1.0 / batchsize) * gradient[layerIdx]                
  209.                 weightDecay = eta * alpha * network[layerIdx]['W']
  210.                
  211.                 network[layerIdx]['W'] = network[layerIdx]['W'] - (gradientDescent + weightDecay);
  212.                
  213.                 #apply maxnorm
  214.                 for wRow in range(network[layerIdx]['W'].shape[0]):
  215.                     maxnorm = maxnorms[layerIdx]
  216.                     if maxnorm > 0:
  217.                         wNorm = np.sqrt(np.sum(network[layerIdx]['W'][wRow, :]**2))
  218.                         if wNorm > maxnorm:
  219.                             network[layerIdx]['W'][wRow, :] = (maxnorm / wNorm) * network[layerIdx]['W'][wRow, :]
  220.                
  221.                        
  222.             #print('network: ', network)
  223.    
  224.     for layerIdx in range(len(network)):
  225.         p = dropoutP[layerIdx]
  226.         network[layerIdx]['W'] *= p
  227.        
  228.         network[layerIdx]['D'] = np.ones(network[layerIdx]['D'].shape)
  229.    
  230.     return network
  231.    
  232.  
  233. def test(network, X, Y, E):    
  234.     nClasses = network[-1]['W'].shape[0]
  235.     nSamples = X.shape[1]
  236.    
  237.     e = 0
  238.     mse = 0
  239.     nCorrect = 0
  240.     confusion = np.zeros((nClasses, nClasses))
  241.    
  242.     for col in range(nSamples):
  243.         x = X[:, col]
  244.         t = Y[:, col]
  245.         y = fw(x, network)
  246.        
  247.         e += E(y, t)
  248.         mse += (np.dot(y - t, y - t))**2
  249.        
  250.         tIdx = np.argmax(y)
  251.         yIdx = np.argmax(t)
  252.        
  253.         if tIdx == yIdx:
  254.             nCorrect += 1
  255.            
  256.         confusion[yIdx, tIdx] += 1
  257.        
  258.    
  259.     accuracy = float(nCorrect) / float(nSamples)
  260.    
  261.     for row in range(confusion.shape[0]):
  262.         confusion[row, :] /= np.sum(confusion[row, :])
  263.    
  264.     return (e, accuracy, confusion, mse)
  265.  
  266.  
  267.  
  268.  
  269. #############################################
  270. # Main Code
  271. #############################################
  272.  
  273. def sinSet(n, sigma2):
  274.     X = np.array([np.linspace(0, 7, n)]);
  275.     Y = np.sin(X);
  276.    
  277.     Yd = Y + np.random.randn(1, X.shape[1]) * np.sqrt(sigma2)
  278.     return (X, Yd, Y)
  279.  
  280. (Xtrain, Ytrain, YtrainClean) = sinSet(20, 0.05)
  281. print(Xtrain.shape)
  282. print(Ytrain.shape)
  283.  
  284. (Xtrain, mu, Sigma) = scaleSet(Xtrain)
  285.  
  286. network = init([Xtrain.shape[0], 100, Ytrain.shape[0]], [AFTanh(), AFLinear()])
  287.  
  288. network = train(network, Xtrain, Ytrain, 0.2, 0, ESquaredError, 10, 5, [0., 0], [1., 1])
  289. print(network)
  290.  
  291. Xtest = np.array([np.linspace(0, 7, 20)])
  292. Xtest = scaleSetParameter(Xtest, mu, Sigma)
  293. Ytest = fwSet(Xtest, network)
  294.  
  295. plt.plot(Xtrain, Ytrain, c='r', marker='o')
  296. plt.plot(Xtrain, YtrainClean, c='b', marker='o')
  297. plt.plot(Xtest, Ytest, c='g', marker='x')
  298. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement