Untitled

import numpy as np
import abc

class Operation(abc.ABC):
    def __init__(self, x, parameter=None):
        self.x = x
        self.parameter = parameter
        self.z = None

    def f(self):
        self.z = self.o()
        return self.z

    @abc.abstractmethod
    def o(self):
        pass

    def gradient_by_parameter(self):
        pass

    def gradient_by_input(self):
        pass


class Dot(Operation):
    def __init__(self, x, parameter=None):
        super(Dot, self).__init__(x, parameter)

    def o(self):
        return np.dot(self.parameter, self.x)

    def gradient_by_input(self):
        return self.parameter

    def gradient_by_parameter(self):
        m = self.parameter.shape[0]
        n = self.parameter.shape[1]
        gradient = np.empty((m, m, n))
        #print(gradient.shape)
        for i in range(0, m):
            gradient[i] = np.zeros((m, n))
            gradient[i, i] = self.x.T
        return gradient


class Add(Operation):
    def __init__(self, x, parameter=None):
        super(Add, self).__init__(x, parameter)

    def o(self):
        return np.add(self.x, self.parameter)

    def gradient_by_input(self):
        n = self.x.shape[0]
        return np.eye(n)

    def gradient_by_parameter(self):
        n = self.x.shape[0]
        return np.eye(n)


class Relu(Operation):
    def __init__(self, x, parameter=None):
        super(Relu, self).__init__(x, parameter)

    def o(self):
        return np.maximum(0, self.x)

    def gradient_by_input(self):
        y = self.z
        y[y > 0] = 1
        y[y <= 0] = 0
        return y


class Square(Operation):
    def __init__(self, x, parameter=None):
        super(Square, self).__init__(x, parameter)

    def o(self):
        return np.dot(self.x, self.x)

    def gradient_by_input(self):
        return 2 * self.x.T


class Sub(Operation):
    def __init__(self, x, parameter=None):
        super(Sub, self).__init__(x, parameter)

    def o(self):
        return np.subtract(self.x, self.parameter)

    def gradient_by_input(self):
        n = self.x.shape[0]
        return np.eye(n)


class CrossEntropyLoss(Operation):
    def __init__(self, x, parameter=None):
        super(CrossEntropyLoss, self).__init__(x, parameter)

    def o(self):
        return - np.sum(np.multiply(self.parameter, np.log(self.x))
                        + np.multiply((1 - self.parameter), np.log(1 - self.x)))

    def gradient_by_input(self):
        return np.divide(np.subtract(self.x, self.parameter), np.multiply(y, 1-y))

X = np.array([[0, 0], [1, 1], [0, 1], [1, 0]])
Y = np.array([0, 0, 1, 1])

W1 = np.random.random((1, 2))
b1 = np.random.random((1,1))
b2 = np.random.random((1,1))

for i in range(10):

    x = X[i%4, :].reshape(X.shape[1], 1)
    y = Y[i%4]


    ## forward
    def forw_back(x, y, W1, b1):
        operations = []
        dot1 = Dot(x, W1)
        z1d = dot1.f()

        operations.append(dot1)

        add1 = Add(z1d, b1)
        operations.append(add1)
        z1a = add1.f()

        relu1 = Relu(z1a)
        operations.append(relu1)
        y_ = relu1.f()

        add2 = Add(y_, b2)
        operations.append(add2)
        y_2 = add2.f()

        print(y, y_2[0])
        ## loss
        sub1 = Sub(y_2, y)
        operations.append(sub1)
        L = sub1.f()
        square1 = Square(L)
        operations.append(square1)
        L2 = square1.f()


        ## backprop
        L2_g = square1.gradient_by_input()
        L_g = sub1.gradient_by_input()
        y_2g = add2.gradient_by_input()
        y_g = relu1.gradient_by_input()
        z1a_g = add1.gradient_by_input()
        z1a_gb = add1.gradient_by_parameter()
        z1d_g = dot1.gradient_by_parameter()

        backprop_b = np.dot(np.dot(np.dot(L2_g, L_g), y_g), z1a_gb)
        backprop_w = np.dot(np.dot(np.dot(np.dot(L2_g, L_g), y_g), z1a_g), z1d_g)

        ## weight update
        alpha = 1
        W1 = W1 - alpha*backprop_w
        b1 = b1 - alpha*backprop_b
        return W1[0], b1

    W1, b1 = forw_back(x, y, W1, b1)
    print(W1)

def forw(x, W1, b1):
    dot1 = Dot(x, W1)
    z1d = dot1.f()

    add1 = Add(z1d, b1)
    z1a = add1.f()

    relu1 = Relu(z1a)
    y_ = relu1.f()

    return y

for i in range(4):
    x = X[i, :].reshape(X.shape[1], 1)
    print("x: ", x.T, "y=", forw(x, W1, b1))