Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- W1 = np.random.randn(H, X.shape[1])
- b1 = np.random.randn(H)
- W2 = np.random.randn(C, H)
- b2 = np.random.randn(C)
- Y = data.create_onehot_matrix(Y_)
- for i in range(param_niter):
- s1 = np.dot(X, W1.T) + b1
- h1 = np.array(s1)
- h1[h1<0] = 0
- s2 = np.dot(s1, W2.T) + b2
- probs = data.matrix_stable_softmax(s2)
- positions = np.where(Y == 1)
- logprobs = np.log(probs[positions])
- loss = -np.mean(logprobs)
- if i % 10 == 0:
- print("iteration {}: loss {}".format(i, loss))
- Gs2 = probs - Y
- grad_W2 = np.dot(Gs2.T, h1)
- grad_b2 = np.sum(Gs2, axis=0)
- Gh1 = np.dot(Gs2, W2)
- tmp = np.array(s1)
- tmp[tmp<=0] = 0
- tmp = np.count_nonzero(tmp, axis=0)
- diag = np.diag(tmp)
- Gs1 = np.dot(Gh1, diag)
- grad_W1 = np.dot(Gs1.T, X)
- grad_b1 = np.sum(Gs1, axis=0)
- W2 -= param_delta * grad_W2
- b2 -= param_delta * grad_b2
- W1 -= param_delta * grad_W1
- b1 -= param_delta * grad_b1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement