Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import abc
- class Operation(abc.ABC):
- def __init__(self, x, parameter=None):
- self.x = x
- self.parameter = parameter
- self.z = None
- def f(self):
- self.z = self.o()
- return self.z
- @abc.abstractmethod
- def o(self):
- pass
- def gradient_by_parameter(self):
- pass
- def gradient_by_input(self):
- pass
- class Dot(Operation):
- def __init__(self, x, parameter=None):
- super(Dot, self).__init__(x, parameter)
- def o(self):
- return np.dot(self.parameter, self.x)
- def gradient_by_input(self):
- return self.parameter
- def gradient_by_parameter(self):
- m = self.parameter.shape[0]
- n = self.parameter.shape[1]
- gradient = np.empty((m, m, n))
- #print(gradient.shape)
- for i in range(0, m):
- gradient[i] = np.zeros((m, n))
- gradient[i, i] = self.x.T
- return gradient
- class Add(Operation):
- def __init__(self, x, parameter=None):
- super(Add, self).__init__(x, parameter)
- def o(self):
- return np.add(self.x, self.parameter)
- def gradient_by_input(self):
- n = self.x.shape[0]
- return np.eye(n)
- def gradient_by_parameter(self):
- n = self.x.shape[0]
- return np.eye(n)
- class Relu(Operation):
- def __init__(self, x, parameter=None):
- super(Relu, self).__init__(x, parameter)
- def o(self):
- return np.maximum(0, self.x)
- def gradient_by_input(self):
- y = self.z
- y[y > 0] = 1
- y[y <= 0] = 0
- return y
- class Square(Operation):
- def __init__(self, x, parameter=None):
- super(Square, self).__init__(x, parameter)
- def o(self):
- return np.dot(self.x, self.x)
- def gradient_by_input(self):
- return 2 * self.x.T
- class Sub(Operation):
- def __init__(self, x, parameter=None):
- super(Sub, self).__init__(x, parameter)
- def o(self):
- return np.subtract(self.x, self.parameter)
- def gradient_by_input(self):
- n = self.x.shape[0]
- return np.eye(n)
- class CrossEntropyLoss(Operation):
- def __init__(self, x, parameter=None):
- super(CrossEntropyLoss, self).__init__(x, parameter)
- def o(self):
- return - np.sum(np.multiply(self.parameter, np.log(self.x))
- + np.multiply((1 - self.parameter), np.log(1 - self.x)))
- def gradient_by_input(self):
- return np.divide(np.subtract(self.x, self.parameter), np.multiply(y, 1-y))
- X = np.array([[0, 0], [1, 1], [0, 1], [1, 0]])
- Y = np.array([0, 0, 1, 1])
- W1 = np.random.random((1, 2))
- b1 = np.random.random((1,1))
- b2 = np.random.random((1,1))
- for i in range(10):
- x = X[i%4, :].reshape(X.shape[1], 1)
- y = Y[i%4]
- ## forward
- def forw_back(x, y, W1, b1):
- operations = []
- dot1 = Dot(x, W1)
- z1d = dot1.f()
- operations.append(dot1)
- add1 = Add(z1d, b1)
- operations.append(add1)
- z1a = add1.f()
- relu1 = Relu(z1a)
- operations.append(relu1)
- y_ = relu1.f()
- add2 = Add(y_, b2)
- operations.append(add2)
- y_2 = add2.f()
- print(y, y_2[0])
- ## loss
- sub1 = Sub(y_2, y)
- operations.append(sub1)
- L = sub1.f()
- square1 = Square(L)
- operations.append(square1)
- L2 = square1.f()
- ## backprop
- L2_g = square1.gradient_by_input()
- L_g = sub1.gradient_by_input()
- y_2g = add2.gradient_by_input()
- y_g = relu1.gradient_by_input()
- z1a_g = add1.gradient_by_input()
- z1a_gb = add1.gradient_by_parameter()
- z1d_g = dot1.gradient_by_parameter()
- backprop_b = np.dot(np.dot(np.dot(L2_g, L_g), y_g), z1a_gb)
- backprop_w = np.dot(np.dot(np.dot(np.dot(L2_g, L_g), y_g), z1a_g), z1d_g)
- ## weight update
- alpha = 1
- W1 = W1 - alpha*backprop_w
- b1 = b1 - alpha*backprop_b
- return W1[0], b1
- W1, b1 = forw_back(x, y, W1, b1)
- print(W1)
- def forw(x, W1, b1):
- dot1 = Dot(x, W1)
- z1d = dot1.f()
- add1 = Add(z1d, b1)
- z1a = add1.f()
- relu1 = Relu(z1a)
- y_ = relu1.f()
- return y
- for i in range(4):
- x = X[i, :].reshape(X.shape[1], 1)
- print("x: ", x.T, "y=", forw(x, W1, b1))
Add Comment
Please, Sign In to add comment