Advertisement
Guest User

Untitled

a guest
Jul 27th, 2017
80
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.64 KB | None | 0 0
  1. from dataset.minist import load_mnist
  2. import numpy as np
  3. from collections import OrderedDict
  4.  
  5.  
  6. def softmax(x):
  7. if x.ndim == 2:
  8. x = x.T
  9. x -= np.max(x, axis=0)
  10. y = np.exp(x) / np.sum(np.exp(x), axis=0)
  11. return y.T
  12.  
  13. x -= np.max(x)
  14. return np.exp(x) / np.sum(np.exp(x))
  15.  
  16.  
  17. def cross_entropy_error(y, t):
  18. if y.ndim == 1:
  19. t = t.reshape(1, t.size)
  20. y = y.reshape(1, y.size)
  21.  
  22. if t.size == y.size:
  23. t = t.argmax(axis=1)
  24.  
  25. batch_size = y.shape[0]
  26.  
  27. return -np.sum(np.log(y[np.arange(batch_size), t])) / batch_size
  28.  
  29.  
  30. class Relu:
  31. def __init__(self):
  32. self.mask = None
  33.  
  34. def forward(self, x):
  35. self.mask = (x <= 0)
  36. out = x.copy()
  37. out[self.mask] = 0
  38.  
  39. return out
  40.  
  41. def backward(self, dout):
  42. dout[self.mask] = 0
  43. dx = dout
  44.  
  45. return dx
  46.  
  47.  
  48. class Affine:
  49. def __init__(self, W, b):
  50. self.W = W
  51. self.b = b
  52. self.x = None
  53. self.dW = None
  54. self.Db = None
  55.  
  56. def forward(self, x):
  57. self.x = x
  58. out = np.dot(x, self.W) + self.b
  59.  
  60. return out
  61.  
  62. def backward(self, dout):
  63. dx = np.dot(dout, self.W.T)
  64. self.dW = np.dot(self.x.T, dout)
  65. self.db = np.sum(dout, axis=0)
  66.  
  67. return dx
  68.  
  69.  
  70. class SoftmaxWithLoss:
  71. def __init__(self):
  72. self.loss = None
  73. self.y = None
  74. self.t = None
  75.  
  76. def forward(self, x, t):
  77. self.t = t
  78. self.y = softmax(x)
  79. self.loss = cross_entropy_error(self.y, self.t)
  80.  
  81. def backward(self, dout=1):
  82. batch_size = self.t.shape[0]
  83. if self.t.size == self.y.size:
  84. dx = (self.y - self.t) / batch_size
  85. else:
  86. dx = self.y.copy()
  87. dx[np.arrange(batch_size), self.t] -= 1
  88. dx = dx / batch_size
  89.  
  90. return dx
  91.  
  92.  
  93. class TwoLayerNet:
  94. def __init__(self, input_size, hidden_size, output_size,
  95. weight_init_std=1.0):
  96. self.params = OrderedDict()
  97. self.params['W1'] = weight_init_std * \
  98. np.random.randn(input_size, hidden_size) / np.sqrt(input_size)
  99. self.params['b1'] = np.zeros(hidden_size)
  100. self.params['W2'] = weight_init_std * \
  101. np.random.randn(hidden_size, output_size) / np.sqrt(input_size)
  102. self.params['b2'] = np.zeros(output_size)
  103.  
  104. self.layers = OrderedDict()
  105. self.layers['Affine1'] = \
  106. Affine(self.params['W1'], self.params['b1'])
  107. self.layers['Relu1'] = Relu()
  108. self.layers['Affine2'] = \
  109. Affine(self.params['W2'], self.params['b2'])
  110.  
  111. self.lastLayer = SoftmaxWithLoss()
  112.  
  113. def predict(self, x):
  114. for layer in self.layers.values():
  115. x = layer.forward(x)
  116.  
  117. return x
  118.  
  119. def loss(self, x, t):
  120. y = self.predict(x)
  121.  
  122. return self.lastLayer.forward(y, t)
  123.  
  124. def accuracy(self, x, t):
  125. y = self.predict(x)
  126. y = np.argmax(y, axis=1)
  127. t = np.argmax(t, axis=1)
  128.  
  129. return np.sum(y == t) / float(x.shape[0])
  130.  
  131. def gradient(self, x, t):
  132. self.loss(x, t)
  133.  
  134. dout = 1
  135. dout = self.lastLayer.backward(dout)
  136.  
  137. layers = list(self.layers.values())
  138. layers.reverse()
  139. for layer in layers:
  140. dout = layer.backward(dout)
  141.  
  142. grads = {}
  143. grads['W1'] = self.layers['Affine1'].dW
  144. grads['b1'] = self.layers['Affine1'].db
  145. grads['W2'] = self.layers['Affine2'].dW
  146. grads['b2'] = self.layers['Affine2'].db
  147.  
  148. return grads
  149.  
  150.  
  151. class SGD:
  152. def __init__(self, lr=0.1):
  153. self.lr = lr
  154.  
  155. def update(self, params, grads):
  156. for key in params.keys():
  157. params[key] -= self.lr * grads[key]
  158.  
  159.  
  160. class Momentum:
  161. def __init__(self, lr=0.1, momentum=0.9):
  162. self.lr = lr
  163. self.momentum = momentum
  164. self.v = None
  165.  
  166. def update(self, params, grads):
  167. if self.v is None:
  168. self.v = {}
  169. for key, val in params.items():
  170. self.v[key] = np.zeros_like(val)
  171.  
  172. for key in params.keys():
  173. self.v[key] = self.momentum * self.v[key] - self.lr * grads[key]
  174. params[key] += self.v[key]
  175.  
  176.  
  177. class AdaGrad:
  178. def __init__(self, lr=0.1):
  179. self.lr = lr
  180. self.h = None
  181.  
  182. def update(self, params, grads):
  183. if self.h is None:
  184. self.h = {}
  185. for key, val in params.items():
  186. self.h[key] = np.zeros_like(val)
  187.  
  188. for key in params.keys():
  189. self.h[key] += grads[key] * grads[key]
  190. params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7)
  191.  
  192.  
  193. (x_train, t_train), (x_test, t_test) = \
  194. load_mnist(normalize=True, one_hot_label=True)
  195.  
  196. iters_num = 10000
  197. train_size = x_train.shape[0]
  198. batch_size = 100
  199.  
  200. train_acc_index = []
  201. train_loss_list = []
  202. train_acc_list = []
  203. test_acc_list = []
  204.  
  205. iter_per_epoch = max(train_size / batch_size, 1)
  206.  
  207. network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
  208. # optimizer = SGD()
  209. optimizer = Momentum()
  210. # optimizer = AdaGrad()
  211.  
  212. for i in range(iters_num):
  213. batch_mask = np.random.choice(train_size, batch_size)
  214. x_batch = x_train[batch_mask]
  215. t_batch = t_train[batch_mask]
  216.  
  217. grad = network.gradient(x_batch, t_batch)
  218.  
  219. optimizer.update(network.params, grad)
  220.  
  221. loss = network.loss(x_batch, t_batch)
  222. train_loss_list.append(loss)
  223.  
  224. if i % iter_per_epoch == 0:
  225. train_acc = network.accuracy(x_train, t_train)
  226. test_acc = network.accuracy(x_test, t_test)
  227. train_acc_index.append(i)
  228. train_acc_list.append(train_acc)
  229. test_acc_list.append(test_acc)
  230.  
  231. print("epoc:", i, " train acc: ", train_acc,
  232. " test acc:", test_acc)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement