Untitled

import theanets as tn
import numpy as np
import sys
from types import MethodType
import warnings


def range_sel(validation_factor, l, full_trainset=False):
    """
    Returns a tuple for randomly splitting l samples into 2 groups
    with given relation size. if full_trainset, then training set
    will include the validation set
    """
    assert validation_factor <= 1
    cut = int((1 - validation_factor) * l)
    idx = np.random.permutation(l)
    return (idx if full_trainset else idx[:cut], idx[cut:])


def makeCustomErrorNet(baseclass, custom_error_fn):

    class CustomErrorNetwork(baseclass):

        """
        Allows custom error fn-s. custom_error_fn must be a function of 2 args, self and outputs.
        Custom error functions may need redefining after theanets upgrade.
        """

        def __init__(self,  **kwargs):
            super(CustomErrorNetwork, self).__init__(**kwargs)
            self.custom_error_fn = kwargs.get(custom_error_fn, None)

        def error(self, outputs):
            return self.custom_error_fn(self, outputs)

    return CustomErrorNetwork(custom_error_fn=custom_error_fn)


class FFTheanet(object):

    """feedforward network"""

    def __init__(self, input_count, hidden_layers, output_count, custom_error_fn=None,
                 nclass=tn.Regressor, **kwargs):
        """
        input_count:            Number of inputs

        hidden_layers:   Tuple of layer sizes or  tuple of dicts, that specify
                                theanets layer parameters (e.g., {'size:10', 'activation':'softmax'}
                                makes a layer of 10 softmax neurons. See theansts docs for more info.)

        output_count:           number of outputs

        custom_error_fn:        Custom error function, lambda self,output:error_value. If this is
                                specified, a new class will be created (based on nclass) with the
                                redefined error function.

        nclass:                 Class of theanets network. If custom_error_fn is defined, it will be
                                the parent for new class

        Other named args can be passed to theanets' constructor.
        """
        self._layers = (
            (input_count, ) + tuple(x for x in hidden_layers) + (output_count,))
        self._custom_error_fn = custom_error_fn
        self._nclass = nclass
        self._kwargs = kwargs
        self._train_attempts = None
        self._init_exp()

    def _init_exp(self):
        """
        Initializes the theanets experiment with saved params.

        """
        self._tnexp = tn.Experiment(self._nclass if self._custom_error_fn == None else makeCustomErrorNet(self._nclass, self._custom_error_fn).__class__,
                                    layers=self._layers, **self._kwargs)
        if self._custom_error_fn != None:
            self._tnexp.network.custom_error_fn = self._custom_error_fn

    @property
    def input_count(self):
        return self._tnexp.network.layers[0].nout

    @property
    def output_count(self):
        return self._tnexp.network.layers[-1].nout

    def predict(self, inputs, classifier_len=None, clasifier_prob=True):
        """
        Returns the network's outputs for the  given inputs. If the net is a classifier, this returns
        classifier_len predictions ordered by probability from max to min. If clasifier_prob is True,
        returned value includes category  probabilities  and its shape is
        classifier_len * 2 * number_of_experiments.
        predict(...)[experiment_number][0]: values
        predict(...)[experiment_number][1]: probabilities

        inputs:                 Numpy array of input data

        classifier_len:         Amount of most probable categories that will be included in output

        clasifier_prob:         Include category probabilities in output
        """
        # todo: warn if classifier params are set and this is not a classifier
        if classifier_len == None:
            classifier_len = self.output_count
        # Prediction doesn't mutate the weights!
        # validation
        assert inputs.shape[0] == self.input_count
        # before classifier params:
        # return
        # np.transpose(self._tnexp.network.predict(np.transpose(inputs)))
        outs = np.transpose(self._tnexp.network.predict(np.transpose(inputs)))
        if isinstance(self._tnexp.network, tn.Classifier):
            # - outs, because argsort goes from min to max and we need highest probability first.
            inds = [y[:classifier_len] for y in np.argsort(-outs.T)]
            probs = [-x[np.argsort(x)[:classifier_len]] for x in -outs.T]
            return np.array(list(zip(inds, probs)) if clasifier_prob else inds).T
        else:
            return outs

    def _prertrain(self, inputs, targets, validation_factor, full_trainset):
        """
        validates and returns the list for training.

        Inputs, targets:        Numpy arrays of values.

        validation_factor:      Fraction of data that will be used for validation during training

        full_trainset:          If true,  training set will include the validation set, if false,
                                training set will be split
        """
        if not self._tnexp.network.__module__ == 'theanets.recurrent':
            assert inputs.shape[1] == targets.shape[1]
            assert inputs.shape[0] == self.input_count
            if not isinstance(self._tnexp.network, tn.Classifier):
                assert targets.shape[0] == self.output_count
                # todo:
        tgts = targets[0] if isinstance(
            self._tnexp.network, tn.Classifier) else targets.T
        training_index, validation_index = range_sel(
            validation_factor, inputs.shape[1], full_trainset)

        t1 = np.transpose(inputs[:, training_index])
        t2 = (tgts[training_index])
        training_set = [t1, t2]
        validation_set = [np.transpose(inputs[:, validation_index]),
                          (tgts[validation_index])]
        return training_set, validation_set

    def train(self, inputs, targets, validation_factor=0.1, full_trainset=False, train_till_error=None,
              train_attempts=1, retry_with_other_weights=False, error_abs=True, **kwargs):
        """
        Trains by  array of in/outputs. Keyworded args  can be passed to theanets for fine tuninig
        of the training process.

        Inputs, targets:            Numpy arrays of values.

        validation_factor:          Fraction of data that will be used for validation during training

        full_trainset:              If true,  training set will include the validation set, if false,
                                    training set will be split

        train_till_error:           None, or an error value which is determined as the stopping point
                                    for the traininf process.

        train_attempts:             Number of training attempts. Data/validation sets are randomly shuffled

        retry_with_other_weights:   If true, after train_attempts the net will reinitialize with different
                                    weights and try train_attempts more attempts if target error value is
                                    not achieved. This happens only once, so no infinite loops here.

        error_abs:                  if false, train_till_error will be treated as relative error (err / var(inputs))
                                    However, such comparison is slow, so it is usually better to use custom error
                                    functions for handling error values
        """
        # todo: warn if retry params are set, but train_till_error is not
        if retry_with_other_weights and self._train_attempts is None:
            self._train_attempts = train_attempts
        if not retry_with_other_weights:
            self._train_attempts = None
        training, validation = self._prertrain(
            inputs, targets, validation_factor, full_trainset)
        if train_till_error == None:
            return self._tnexp.run(training, validation, **kwargs)
        else:
            if not error_abs:
                var = np.var(training[1])
                print('var = ', var)
            adj_tte = train_till_error if error_abs else train_till_error * var
            ready = False
            i = 0
            for train, valid in self._tnexp.itertrain(training, validation, **kwargs):
                ready = (train['loss']  <= adj_tte)
                i += 1
                if(ready):
                    print(i, "iterations till target error, ",
                          train_attempts, "attempts left, ", 'current error', train['loss'])
                    return train
            if train_attempts == 0:
                if(retry_with_other_weights):
                    print("Re-initializing net...")
                    self._reinit()
                    return self.train(inputs, targets, validation_factor, full_trainset,
                        train_till_error, self._train_attempts, retry_with_other_weights=False,
                        error_abs=error_abs, **kwargs)
                # todo: suggestt retying with other weights if it is possible
                warnings.warn('Could not train till specified error value. Try supplying more data, training with a different algorithm or specifying a higher error value',
                              RuntimeWarning)
                return train
            # In the new train() call the datasets are randomly permuted
            print("Remaining training attempts:",
                  train_attempts - 1, 'iterations:', i, 'current error', train['loss'])

            return self.train(inputs, targets, validation_factor, full_trainset,
                train_till_error, train_attempts - 1, retry_with_other_weights=retry_with_other_weights,
                error_abs=error_abs, **kwargs)

    def _reinit(self):
        """
        re-initializes the network for the experiment with  stored parameters
        This makes weights random, so any training done before will become useless.
        """
        del(self._tnexp)
        self._init_exp()
        pass