Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from dataset.minist import load_mnist
- import numpy as np
- from collections import OrderedDict
- def softmax(x):
- if x.ndim == 2:
- x = x.T
- x -= np.max(x, axis=0)
- y = np.exp(x) / np.sum(np.exp(x), axis=0)
- return y.T
- x -= np.max(x)
- return np.exp(x) / np.sum(np.exp(x))
- def cross_entropy_error(y, t):
- if y.ndim == 1:
- t = t.reshape(1, t.size)
- y = y.reshape(1, y.size)
- if t.size == y.size:
- t = t.argmax(axis=1)
- batch_size = y.shape[0]
- return -np.sum(np.log(y[np.arange(batch_size), t])) / batch_size
- class Relu:
- def __init__(self):
- self.mask = None
- def forward(self, x):
- self.mask = (x <= 0)
- out = x.copy()
- out[self.mask] = 0
- return out
- def backward(self, dout):
- dout[self.mask] = 0
- dx = dout
- return dx
- class Affine:
- def __init__(self, W, b):
- self.W = W
- self.b = b
- self.x = None
- self.dW = None
- self.Db = None
- def forward(self, x):
- self.x = x
- out = np.dot(x, self.W) + self.b
- return out
- def backward(self, dout):
- dx = np.dot(dout, self.W.T)
- self.dW = np.dot(self.x.T, dout)
- self.db = np.sum(dout, axis=0)
- return dx
- class SoftmaxWithLoss:
- def __init__(self):
- self.loss = None
- self.y = None
- self.t = None
- def forward(self, x, t):
- self.t = t
- self.y = softmax(x)
- self.loss = cross_entropy_error(self.y, self.t)
- def backward(self, dout=1):
- batch_size = self.t.shape[0]
- if self.t.size == self.y.size:
- dx = (self.y - self.t) / batch_size
- else:
- dx = self.y.copy()
- dx[np.arrange(batch_size), self.t] -= 1
- dx = dx / batch_size
- return dx
- class TwoLayerNet:
- def __init__(self, input_size, hidden_size, output_size,
- weight_init_std=1.0):
- self.params = OrderedDict()
- self.params['W1'] = weight_init_std * \
- np.random.randn(input_size, hidden_size) / np.sqrt(input_size)
- self.params['b1'] = np.zeros(hidden_size)
- self.params['W2'] = weight_init_std * \
- np.random.randn(hidden_size, output_size) / np.sqrt(input_size)
- self.params['b2'] = np.zeros(output_size)
- self.layers = OrderedDict()
- self.layers['Affine1'] = \
- Affine(self.params['W1'], self.params['b1'])
- self.layers['Relu1'] = Relu()
- self.layers['Affine2'] = \
- Affine(self.params['W2'], self.params['b2'])
- self.lastLayer = SoftmaxWithLoss()
- def predict(self, x):
- for layer in self.layers.values():
- x = layer.forward(x)
- return x
- def loss(self, x, t):
- y = self.predict(x)
- return self.lastLayer.forward(y, t)
- def accuracy(self, x, t):
- y = self.predict(x)
- y = np.argmax(y, axis=1)
- t = np.argmax(t, axis=1)
- return np.sum(y == t) / float(x.shape[0])
- def gradient(self, x, t):
- self.loss(x, t)
- dout = 1
- dout = self.lastLayer.backward(dout)
- layers = list(self.layers.values())
- layers.reverse()
- for layer in layers:
- dout = layer.backward(dout)
- grads = {}
- grads['W1'] = self.layers['Affine1'].dW
- grads['b1'] = self.layers['Affine1'].db
- grads['W2'] = self.layers['Affine2'].dW
- grads['b2'] = self.layers['Affine2'].db
- return grads
- class SGD:
- def __init__(self, lr=0.1):
- self.lr = lr
- def update(self, params, grads):
- for key in params.keys():
- params[key] -= self.lr * grads[key]
- class Momentum:
- def __init__(self, lr=0.1, momentum=0.9):
- self.lr = lr
- self.momentum = momentum
- self.v = None
- def update(self, params, grads):
- if self.v is None:
- self.v = {}
- for key, val in params.items():
- self.v[key] = np.zeros_like(val)
- for key in params.keys():
- self.v[key] = self.momentum * self.v[key] - self.lr * grads[key]
- params[key] += self.v[key]
- class AdaGrad:
- def __init__(self, lr=0.1):
- self.lr = lr
- self.h = None
- def update(self, params, grads):
- if self.h is None:
- self.h = {}
- for key, val in params.items():
- self.h[key] = np.zeros_like(val)
- for key in params.keys():
- self.h[key] += grads[key] * grads[key]
- params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7)
- (x_train, t_train), (x_test, t_test) = \
- load_mnist(normalize=True, one_hot_label=True)
- iters_num = 10000
- train_size = x_train.shape[0]
- batch_size = 100
- train_acc_index = []
- train_loss_list = []
- train_acc_list = []
- test_acc_list = []
- iter_per_epoch = max(train_size / batch_size, 1)
- network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
- # optimizer = SGD()
- optimizer = Momentum()
- # optimizer = AdaGrad()
- for i in range(iters_num):
- batch_mask = np.random.choice(train_size, batch_size)
- x_batch = x_train[batch_mask]
- t_batch = t_train[batch_mask]
- grad = network.gradient(x_batch, t_batch)
- optimizer.update(network.params, grad)
- loss = network.loss(x_batch, t_batch)
- train_loss_list.append(loss)
- if i % iter_per_epoch == 0:
- train_acc = network.accuracy(x_train, t_train)
- test_acc = network.accuracy(x_test, t_test)
- train_acc_index.append(i)
- train_acc_list.append(train_acc)
- test_acc_list.append(test_acc)
- print("epoc:", i, " train acc: ", train_acc,
- " test acc:", test_acc)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement