Untitled

# Numpy for matrix math and matplotlib for plotting loss
import numpy as np
import matplotlib.pyplot as plt
# Abstract Base Class
from abc import ABC, abstractmethod

MOMENTUM = 0.9


def l2_loss(y, yhat):
    loss_matrix = np.square(yhat - y)
    loss_gradient = 2 * (yhat - y)
    return loss_matrix, loss_gradient


def apply_linear_momentum(prev_momentum, grad_parameter, momentum):
    # Calculate momentum update by linear momentum method
    assert momentum <= 1 and momentum >= 0

    return prev_momentum * momentum + grad_parameter * (1 - momentum)


class Layer(ABC):
    @abstractmethod
    def __init__(self, **args):
        pass

    @abstractmethod
    def forward(self, x):
        # Forward propagate. Remember params needed for backprop
        pass

    @abstractmethod
    def backward(self, x):
        # Return gradient to input, gradient to parameters
        pass

    def step(self, learning_rate):
        pass


class Lrelu(Layer):
    def __init__(self, input_layer=None, in_size=None):
        self.out_size = input_layer if input_layer is not None else in_size

    def forward(self, x):
        return np.maximum(x, x * .1)

    def backward(self, out_grad):
        grad_back = np.where(out_grad > 0, out_grad, out_grad * .1)
        return grad_back


class Linear(Layer):
    def __init__(self, input_layer=None, in_size=None, out_size=None):
        self.in_size = input_layer if input_layer is not None else in_size
        self.out_size = out_size
        self.w = np.random.randn(in_size, out_size)
        self.vel = np.zeros((in_size, out_size))

    def forward(self, x):
        self.prev_input = x
        return np.matmul(x, self.w)

    def backward(self, out_grad):
        # in_size, BS * BS, out_size = in_size, out_size
        raw_grad_w = np.matmul(self.prev_input.T, out_grad)
        self.grad_w = np.clip(raw_grad_w, -1, 1)

        # BS, out_size * out_size, in_size = BS, in_size
        return np.matmul(out_grad, self.w.T)

    def step(self, learning_rate):
        self.vel = apply_linear_momentum(self.vel, self.grad_w, MOMENTUM)
        self.w = self.w - self.vel * learning_rate


class MultiLayerPerceptron():
    def __init__(self, layers, loss_fcn):
        self.layers = layers
        self.loss_fcn = loss_fcn

    def forward(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        yhat = x
        return yhat

    def backward(self, loss_gradient):
        for layer in self.layers[::-1]:
            loss_gradient = layer.backward(loss_gradient)

    def loss(self, y, yhat):
        loss_matrix, loss_gradient = self.loss_fcn(y, yhat)
        return loss_matrix, loss_gradient

    def step(self, learning_rate):
        for layer in self.layers:
            layer.step(learning_rate)


class Trainer():
    def __init__(self, model):
        self.model = model
        self.losses = []
        self.steps = 0

    def optimize(self, x, y, learning_rate):
        model = self.model
        yhat = model.forward(x)
        loss_matrix, loss_gradient = model.loss(y, yhat)
        model.backward(loss_gradient)
        model.step(learning_rate)
        loss_rms = np.sqrt(np.square(loss_matrix).sum(1)).mean()
        self.losses.append(loss_rms)

    def train_n_steps(self, n, x, y):
        for _ in range(n):
            self.steps += 1
            self.optimize(x, y, learning_rate=1 / (self.steps + 100))

    def visualize(self, skip_first=0):
        plt.plot(self.losses[skip_first:])
        plt.title('Loss for model with momentum, clean software\napproaches ' +
                  str(self.losses[-1])[:5])
        plt.savefig('model_5.jpg')
        plt.show()


if __name__ == "__main__":
    # N is batch size; D_in is input dimension;
    # H is hidden dimension; D_out is output dimension.
    N, D_in, H, D_out = 64, 1000, 100, 10

    # Create random input and output data
    x = np.random.randn(N, D_in)
    y = np.random.randn(N, D_out)

    sizes = [D_in, H, D_out]

    layers = []
    layers.append(Linear(in_size=D_in, out_size=H))
    layers.append(Lrelu(in_size=H))
    layers.append(Linear(in_size=H, out_size=D_out))

    model = MultiLayerPerceptron(layers, loss_fcn=l2_loss)

    trainer = Trainer(model)
    trainer.train_n_steps(n=500, x=x, y=y)
    trainer.visualize(skip_first=300)