Untitled

class ReLULayer(tf.keras.layers.Layer):
    """ classic ReLU function for non-linearity """

    def call(self, inputs):
        """
        :param inputs: outputs from the layer before
        :return: ReLu(inputs)
        """
        # TODO (a) ReLU function, you are allowed to use tf.math
        return tf.math.maximum(0., inputs)


class SoftMaxLayer(tf.keras.layers.Layer):
    """ SoftMax (or SoftArgMax) function to transform logits into probabilities """

    def call(self, inputs):
        """
        :param inputs: outputs from the layer before
        :return: SoftMax(inputs)
        """
        maxx = tf.math.reduce_max(inputs, keepdims=True, axis=1)
        sinputs = inputs - maxx
        z = tf.math.reduce_sum(tf.math.exp(sinputs), keepdims=True, axis=1)
        return tf.math.divide(tf.math.exp(sinputs), z)

class DenseLayer(tf.keras.layers.Layer):
    """ a fully connected layer """

    def __init__(self, num_neurons: int, use_bias=True):
        """
        :param num_neurons: number of output neurons
        :param use_bias: whether to use a bias term or not
        """
        super().__init__()
        self.num_neurons = num_neurons
        self.bias = use_bias
        self.w = self.b = None

    def build(self, input_shape):
        # TODO (a) add weights and possibly use_bias variables
        self.w = tf.Variable(initializer(shape=[input_shape[-1],
            self.num_neurons]), trainable=True)

        if self.bias:
            self.b = tf.Variable(initializer(shape=[1,self.num_neurons]),
                    trainable=True)

    def call(self, inputs):
        # TODO (a) linear/affine transformation
        mul = tf.matmul(inputs, self.w)
        if self.bias:
            mul += self.b
        return mul

class SequentialModel(tf.keras.layers.Layer):
    """ a sequential model containing other layers """

    def __init__(self, num_neurons: [int], use_bias=True):
        """
        :param num_neurons: number of output neurons for each DenseLayer.
        :param use_bias: whether a use_bias terms should be used or not
        """
        super().__init__()
        self.modules = []
        # TODO (b) interleave DenseLayer and ReLULayer of given sizes, start and end with DenseLayer
        for i in range(len(num_neurons) - 1):
            self.modules.append(DenseLayer(num_neurons=num_neurons[i],
                use_bias=use_bias))
            self.modules.append(ReLULayer())

        self.modules.append(DenseLayer(num_neurons=num_neurons[-1],
            use_bias=use_bias))
        # TODO (b) add a SoftMaxLayer
        self.modules.append(SoftMaxLayer())

    def call(self, inputs):
        # TODO (b) propagate input sequentially
        x = inputs

        for module in self.modules:
            x = module(x)

        return x

def train_model(model, train_set, eval_set, loss, learning_rate, epochs) -> (list, list):
    """
    :param model: a sequential model defining the network
    :param train_set: a tf.data.Dataset providing the training data
    :param eval_set: a tf.data.Dataset providing the evaluation data
    :param loss: a tensor defining the kind of lost used
    :param learning_rate: learning rate (step size) for stochastic gradient descent
    :param epochs: num epochs to train
    :return: list of evaluation accuracies and list of train accuracies
    """
    # Instantiate an optimizer.
    optimizer = tf.keras.optimizers.SGD(learning_rate)
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
    train_accuracy_per_epoch = []
    eval_accuracy_per_epoch = []

    # TODO (c) for every batch in the set...

    # TODO (c) compute outputs (logits), loss, gradients

    # TODO (c) update the model

    # TODO (c) update the accuracy

    # some train loop
    for e in range(epochs):
        train_accuracy.reset_states()
        for batch_input, batch_target in train_set:
            with tf.GradientTape() as t:
                batch_output = model(batch_input)
                batch_loss = loss(batch_target, batch_output)

            weights = []
            for layer in model.modules:
                if type(layer) is DenseLayer:
                    weights += [layer.w, layer.b]

            grads = t.gradient(batch_loss, weights)
            optimizer.apply_gradients(zip(grads, weights))

            train_accuracy.update_state(batch_target, batch_output)

        train_accuracy_per_epoch.append(train_accuracy.result())
        eval_accuracy_per_epoch.append(test_model(model, eval_set))
        tf.print("epoch: ", e, "\t train accuracy: ", train_accuracy_per_epoch[-1], "\t eval accuracy: ",
                 eval_accuracy_per_epoch[-1])

    return eval_accuracy_per_epoch, train_accuracy_per_epoch