Recursive Self Improvement

"""
Self-improvement demo.

Starts with a random collection of neural net neuron layer sizes and some
random intelligence ratings.

Trains each net to reproduce the ratings for all the nets.

Assigns prediction accuracy as the new intelligence rating.

Then does some GA thingy:

Take each net topology

Have it rate some modifications of itself.

Pick the best ones and add them back to the population.

Then calculate actual prediction accuracies and truncation select or something.

Then repeat.

Doesn't actually work very well. Why?

"""

from pybrain.tools.shortcuts import buildNetwork
from pybrain.datasets import SupervisedDataSet
from pybrain.supervised.trainers import BackpropTrainer

import random
import itertools
import math

# We fix the number of layers
# This is not counting input and output layers
num_layers = 4

# And the pop size
pop_size = 100

# Truncation selection portion
truncation_count = 10

# How many options each network is given for children
child_count = 100

# How many children we take from each selected network.
# Needs to be the right number to rebuild the population
child_survival = pop_size / truncation_count

# And the training iterations
training_iterations = 10

# And the generations
generations = 10


# This holds the (net layer sizes list, fitness) lists for individuals
population = []

for i in xrange(pop_size):
    # Fill in the layer sizes. The first is always exactly enough to
    # receive all the lauyer sizes.
    layer_sizes = []
    for j in xrange(num_layers):
        # Work out how big the layer should be and add it
        layer_sizes.append(random.randint(1, 10))
    # Give it a random fitness and add it to the population
    population.append([layer_sizes, random.random()])

print("Made population")

for iteration_number in xrange(generations):

    # Make a dataset of the population
    dataset = SupervisedDataSet(num_layers, 1)
    for net_plan, fitness in population:
        # Add every network and its fitness as a training sample
        # Every plan has a 1 at the end and a num_layers at the start
        dataset.addSample(net_plan, [fitness])

    print("Made dataset")

    # Evaluate all the fitnesses
    fitnesses = []
    # Save the trained networks
    trained = []
    for net_plan, old_fitness in population:
        # Make the network
        net = buildNetwork(*([num_layers] +  net_plan + [1]))

        print("Training net {}".format(net_plan))
        # Make the trainer
        trainer = BackpropTrainer(net, dataset)
        # Train it and get the training set error.
        # TODO: use generalization error?
        for i in xrange(training_iterations):
            finalError = trainer.train()

        fitness = -math.log(finalError)

        print("Trained {} to fitness {}".format(net_plan, fitness))

        # New fitness is 1-error.
        fitnesses.append(fitness)
        # Save the trained net
        trained.append(net)

    # Apply the fitnesses so we can sort everything
    marked_population = []
    for new_fitness, net, (net_plan, old_fitness) in itertools.izip(
        fitnesses, trained, population):

        marked_population.append([net_plan, net, new_fitness])

    # Sort by fitness
    population = sorted(marked_population, key=lambda x: x[2], reverse=True)

    new_population = []

    # Build a new population
    for i in xrange(truncation_count):
        # Take the top so many and let them choose from random modifications
        # Grab the trained net
        net = population[i][1]
        # And its plan
        net_plan = population[i][0]
        # And its fitness
        fitness = population[i][2]

        print("Making children for {} which scored {}".format(net_plan,
            fitness))

        # holds (rating, child) pairs
        rated_children = []

        # First we just make the children
        children = set()

        for j in xrange(child_count):
            # Make a child
            child = list(net_plan)

            # Make a modification
            child[random.randint(0, len(child) - 1)] += random.randint(-2, 2)

            # Clean up the child
            for k in xrange(len(child)):
                if child[k] <= 0:
                    # Layers must not be empty
                    child[k] = random.randint(1, 10)

            # Add and deduplicate children
            children.add(tuple(child))

        for child in children:
            # Have the parent rate the child
            rating = net.activate(child)[0]

            rated_children.append([rating, child])

        # Sort children by rating, descending
        rated_children = sorted(rated_children, reverse=True)

        while len(rated_children) < child_survival:
            # Add in more children
            rated_children += rated_children
            # And re-sort
            rated_children = sorted(rated_children, reverse=True)

        # Contribute children to population, with parent rating as their
        # goodness to be predicted
        for j in xrange(child_survival):
            print("\tContributed child: {} with rating {}".format(
                rated_children[j][1], rated_children[j][0]))

            # Train the child on the parents and calculate its fitness
            # Make the network
            child_net = buildNetwork(*([num_layers] +  list(rated_children[j][1]) + [1]))

            # Make the trainer
            trainer = BackpropTrainer(child_net, dataset)
            # Train it and get the training set error.
            # TODO: use generalization error?
            for i in xrange(training_iterations):
                finalError = trainer.train()

            fitness = -math.log(finalError)

            print("Trained child to fitness {}".format(fitness))

            # Child's value to be predicted is its performance on the parents'
            # generation
            new_population.append([list(rated_children[j][1]), fitness])

    # Replace the population
    population = new_population

# Report the final population
print("Final population: {}".format(population))