theano mlp for and

__docformat__ = 'restructedtext en'

import gzip
import os
import sys
import timeit

import numpy

import theano
import theano.tensor as T


class Layer():
    """
    this is a layer in the mlp
    it's not meant to predict the outcome hence it does not compute a loss.
    apply the functions for negative log likelihood = cost on the output of the last layer
    """

    def __init__(self, input, n_in, n_out):
        self.W = theano.shared(
                value=numpy.zeros(
                        (n_in, n_out),
                        dtype=theano.config.floatX
                ),
                name="W",
                borrow=True
        )
        self.b = theano.shared(
                value=numpy.zeros((n_in
                                   , n_out),
                                  dtype=theano.config.floatX),
                name="b",
                borrow=True
        )

        self.output = T.nnet.softmax(T.dot(input, self.W) + self.b)
        self.params = (self.W, self.b)
        self.input = input


def y_pred(output):
    return T.argmax(output, axis=1)


def negative_log_likelihood(output, y):
    return -T.mean(T.log(output)[T.arange(y.shape[0]), y])


def errors(output, y):
    # check if y has same dimension of y_pred
    if y.ndim != y_pred(output).ndim:
        raise TypeError(
                'y should have the same shape as self.y_pred',
                ('y', y.type, 'y_pred', y_pred(output).type)
        )
    # check if y is of the correct datatype
    if y.dtype.startswith('int'):
        # the T.neq operator returns a vector of 0s and 1s, where 1
        # represents a mistake in prediction
        return T.mean(T.neq(y_pred(output), y))
    else:
        raise NotImplementedError()


data_x = numpy.matrix([[0, 0],
                       [1, 0],
                       [0, 1],
                       [1, 1]])

data_y = numpy.array([0,
                      0,
                      0,
                      1])

train_set_x = theano.shared(numpy.asarray(data_x,
                         dtype=theano.config.floatX),
                         borrow=True)

train_set_y = T.cast(theano.shared(numpy.asarray(data_y,
                         dtype=theano.config.floatX),
                         borrow=True),"int32")

x = T.vector("x",theano.config.floatX)  # data
y = T.ivector("y")  # labels

classifier = Layer(input=x, n_in=2, n_out=1)

cost = negative_log_likelihood(classifier.output, y)

g_W = T.grad(cost=cost, wrt=classifier.W)
g_b = T.grad(cost=cost, wrt=classifier.b)
index = T.lscalar()

learning_rate = 0.15

updates = [
    (classifier.W, classifier.W - learning_rate * g_W),
    (classifier.b, classifier.b - learning_rate * g_b)
]

train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index],
            y: train_set_y[index]
        }
)
validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: train_set_x[index],
            y: train_set_y[index]
        }
)

print(validate_model(3))
for i in range(3):
    train_model(i)

print(validate_model(3))