Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import matplotlib.pyplot as plt
- #############################################
- # Utility Functions
- #############################################
- def scaleSet(X):
- mu = np.mean(X, axis=1)
- Sigma = np.std(X, axis=1)
- X = (X - mu) / Sigma
- return (X, mu, Sigma)
- def scaleSetParameter(X, mu, Sigma):
- X = (X - mu) / Sigma
- return X
- #############################################
- # Network
- #############################################
- class AFLinear:
- def h(self, x):
- return x
- def hd(self, x):
- return np.ones(x.shape)
- class AFTanh:
- def h(self, x):
- return np.tanh(x)
- def hd(self, x):
- z = self.h(x)
- return 1.0 - z**2
- def fw(x, network):
- z = x
- for layer in network:
- zd = layer['D'] * np.append(z, 1)
- a = layer['W'].dot( zd )
- z = layer['fcn'].h(a)
- return z
- def fwSet(X, network):
- Y = np.zeros((network[-1]['W'].shape[0], X.shape[1]))
- for col in range(X.shape[1]):
- x = X[:, col]
- Y[:, col] = fw(x, network)
- return Y
- #############################################
- # Error functions
- #############################################
- def ESquaredError(y, t):
- diff = y - t
- return np.dot(diff, diff) / 2.0
- #############################################
- # Gradient
- #############################################
- def zeroGradient(network):
- g = []
- for layer in network:
- g.append(np.zeros(layer['W'].shape))
- return g
- def centralDifferences(x, t, E, network, eps = 1e-8):
- gradient = []
- for layer in network:
- gW = np.zeros(layer['W'].shape)
- for row in range(layer['W'].shape[0]):
- for col in range(layer['W'].shape[1]):
- tmp = layer['W'][row, col]
- layer['W'][row, col] += eps
- yp = fw(x, network)
- ep = E(yp, t)
- layer['W'][row, col] = tmp - eps
- ym = fw(x, network)
- em = E(ym, t)
- gW[row, col] = (ep - em) / (2 * eps)
- layer['W'][row, col] = tmp
- gradient.append(gW)
- return gradient
- def backprop(x, t, E, network):
- #fw pass
- z = x
- aList = []
- zList = []
- for layer in network:
- zd = layer['D'] * np.append(z, 1)
- a = layer['W'].dot( zd ) # append bias, apply dropout mask
- z = layer['fcn'].h(a)
- aList.append(a)
- zList.append(z)
- #deltas
- deltaK = z - t
- deltaList = [deltaK]
- for j in reversed(range(0, len(network) - 1)):
- deltaJ = np.append(network[j]['fcn'].hd(aList[j]), 1) * network[j+1]['W'].T.dot(deltaK)
- deltaK = deltaJ[0:-1]
- deltaList.append(deltaK)
- deltaList.reverse()
- #gradient
- gradient = []
- z = x
- for i in range(len(network)):
- zd = network[i]['D'] * np.append(z, 1)
- if i < len(network) - 1:
- delta = np.multiply(network[i+1]['D'][0:-1], deltaList[i])
- else:
- delta = deltaList[i]
- gW = np.outer(delta, zd)
- z = zList[i]
- gradient.append(gW)
- return gradient
- #############################################
- # API
- #############################################
- def init(nNeurons, fcns):
- nLayer = len(nNeurons) - 1
- network = []
- for i in range(nLayer):
- W = np.random.uniform(-1, 1, (nNeurons[i+1], nNeurons[i] + 1))
- network.append({
- 'W': W,
- 'fcn': fcns[i]
- })
- return network
- def train(network, X, Y, eta, alpha, E, epochs, batchsize, maxnorms, dropoutP, quiet=False):
- for epoch in range(epochs):
- if not quiet:
- print("training epoch %d of %d" % (epoch+1, epochs))
- idxs = np.random.permutation(range(X.shape[1]))
- idx = 0
- while(idx < len(idxs)):
- gradient = zeroGradient(network)
- for batchIdx in range(batchsize):
- #create dropout mask
- for layerIdx in range(len(network)):
- p = dropoutP[layerIdx]
- size = network[layerIdx]['W'].shape[1]
- network[layerIdx]['D'] = np.random.binomial(1, p, size)
- sampleIdx = idxs[idx]
- gradientN = backprop(X[:, sampleIdx], Y[:, sampleIdx], E, network)
- #print('--------------')
- #print(X[:, sampleIdx], Y[:, sampleIdx])
- #print('backprop')
- #print(gradientN)
- #print('cd')
- #print(centralDifferences(X[:, sampleIdx], Y[:, sampleIdx], E, network))
- #print('--------------')
- for layerIdx in range(len(gradientN)):
- gradient[layerIdx] += gradientN[layerIdx]
- idx += 1
- for layerIdx in range(len(network)):
- gradientDescent = eta * (1.0 / batchsize) * gradient[layerIdx]
- weightDecay = eta * alpha * network[layerIdx]['W']
- network[layerIdx]['W'] = network[layerIdx]['W'] - (gradientDescent + weightDecay);
- #apply maxnorm
- for wRow in range(network[layerIdx]['W'].shape[0]):
- maxnorm = maxnorms[layerIdx]
- if maxnorm > 0:
- wNorm = np.sqrt(np.sum(network[layerIdx]['W'][wRow, :]**2))
- if wNorm > maxnorm:
- network[layerIdx]['W'][wRow, :] = (maxnorm / wNorm) * network[layerIdx]['W'][wRow, :]
- #print('network: ', network)
- for layerIdx in range(len(network)):
- p = dropoutP[layerIdx]
- network[layerIdx]['W'] *= p
- network[layerIdx]['D'] = np.ones(network[layerIdx]['D'].shape)
- return network
- def test(network, X, Y, E):
- nClasses = network[-1]['W'].shape[0]
- nSamples = X.shape[1]
- e = 0
- mse = 0
- nCorrect = 0
- confusion = np.zeros((nClasses, nClasses))
- for col in range(nSamples):
- x = X[:, col]
- t = Y[:, col]
- y = fw(x, network)
- e += E(y, t)
- mse += (np.dot(y - t, y - t))**2
- tIdx = np.argmax(y)
- yIdx = np.argmax(t)
- if tIdx == yIdx:
- nCorrect += 1
- confusion[yIdx, tIdx] += 1
- accuracy = float(nCorrect) / float(nSamples)
- for row in range(confusion.shape[0]):
- confusion[row, :] /= np.sum(confusion[row, :])
- return (e, accuracy, confusion, mse)
- #############################################
- # Main Code
- #############################################
- def sinSet(n, sigma2):
- X = np.array([np.linspace(0, 7, n)]);
- Y = np.sin(X);
- Yd = Y + np.random.randn(1, X.shape[1]) * np.sqrt(sigma2)
- return (X, Yd, Y)
- (Xtrain, Ytrain, YtrainClean) = sinSet(20, 0.05)
- print(Xtrain.shape)
- print(Ytrain.shape)
- (Xtrain, mu, Sigma) = scaleSet(Xtrain)
- network = init([Xtrain.shape[0], 100, Ytrain.shape[0]], [AFTanh(), AFLinear()])
- network = train(network, Xtrain, Ytrain, 0.2, 0, ESquaredError, 10, 5, [0., 0], [1., 1])
- print(network)
- Xtest = np.array([np.linspace(0, 7, 20)])
- Xtest = scaleSetParameter(Xtest, mu, Sigma)
- Ytest = fwSet(Xtest, network)
- plt.plot(Xtrain, Ytrain, c='r', marker='o')
- plt.plot(Xtrain, YtrainClean, c='b', marker='o')
- plt.plot(Xtest, Ytest, c='g', marker='x')
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement