Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import theanets as tn
- import numpy as np
- import sys
- from types import MethodType
- import warnings
- def range_sel(validation_factor, l, full_trainset=False):
- """
- Returns a tuple for randomly splitting l samples into 2 groups
- with given relation size. if full_trainset, then training set
- will include the validation set
- """
- assert validation_factor <= 1
- cut = int((1 - validation_factor) * l)
- idx = np.random.permutation(l)
- return (idx if full_trainset else idx[:cut], idx[cut:])
- def makeCustomErrorNet(baseclass, custom_error_fn):
- class CustomErrorNetwork(baseclass):
- """
- Allows custom error fn-s. custom_error_fn must be a function of 2 args, self and outputs.
- Custom error functions may need redefining after theanets upgrade.
- """
- def __init__(self, **kwargs):
- super(CustomErrorNetwork, self).__init__(**kwargs)
- self.custom_error_fn = kwargs.get(custom_error_fn, None)
- def error(self, outputs):
- return self.custom_error_fn(self, outputs)
- return CustomErrorNetwork(custom_error_fn=custom_error_fn)
- class FFTheanet(object):
- """feedforward network"""
- def __init__(self, input_count, hidden_layers, output_count, custom_error_fn=None,
- nclass=tn.Regressor, **kwargs):
- """
- input_count: Number of inputs
- hidden_layers: Tuple of layer sizes or tuple of dicts, that specify
- theanets layer parameters (e.g., {'size:10', 'activation':'softmax'}
- makes a layer of 10 softmax neurons. See theansts docs for more info.)
- output_count: number of outputs
- custom_error_fn: Custom error function, lambda self,output:error_value. If this is
- specified, a new class will be created (based on nclass) with the
- redefined error function.
- nclass: Class of theanets network. If custom_error_fn is defined, it will be
- the parent for new class
- Other named args can be passed to theanets' constructor.
- """
- self._layers = (
- (input_count, ) + tuple(x for x in hidden_layers) + (output_count,))
- self._custom_error_fn = custom_error_fn
- self._nclass = nclass
- self._kwargs = kwargs
- self._train_attempts = None
- self._init_exp()
- def _init_exp(self):
- """
- Initializes the theanets experiment with saved params.
- """
- self._tnexp = tn.Experiment(self._nclass if self._custom_error_fn == None else makeCustomErrorNet(self._nclass, self._custom_error_fn).__class__,
- layers=self._layers, **self._kwargs)
- if self._custom_error_fn != None:
- self._tnexp.network.custom_error_fn = self._custom_error_fn
- @property
- def input_count(self):
- return self._tnexp.network.layers[0].nout
- @property
- def output_count(self):
- return self._tnexp.network.layers[-1].nout
- def predict(self, inputs, classifier_len=None, clasifier_prob=True):
- """
- Returns the network's outputs for the given inputs. If the net is a classifier, this returns
- classifier_len predictions ordered by probability from max to min. If clasifier_prob is True,
- returned value includes category probabilities and its shape is
- classifier_len * 2 * number_of_experiments.
- predict(...)[experiment_number][0]: values
- predict(...)[experiment_number][1]: probabilities
- inputs: Numpy array of input data
- classifier_len: Amount of most probable categories that will be included in output
- clasifier_prob: Include category probabilities in output
- """
- # todo: warn if classifier params are set and this is not a classifier
- if classifier_len == None:
- classifier_len = self.output_count
- # Prediction doesn't mutate the weights!
- # validation
- assert inputs.shape[0] == self.input_count
- # before classifier params:
- # return
- # np.transpose(self._tnexp.network.predict(np.transpose(inputs)))
- outs = np.transpose(self._tnexp.network.predict(np.transpose(inputs)))
- if isinstance(self._tnexp.network, tn.Classifier):
- # - outs, because argsort goes from min to max and we need highest probability first.
- inds = [y[:classifier_len] for y in np.argsort(-outs.T)]
- probs = [-x[np.argsort(x)[:classifier_len]] for x in -outs.T]
- return np.array(list(zip(inds, probs)) if clasifier_prob else inds).T
- else:
- return outs
- def _prertrain(self, inputs, targets, validation_factor, full_trainset):
- """
- validates and returns the list for training.
- Inputs, targets: Numpy arrays of values.
- validation_factor: Fraction of data that will be used for validation during training
- full_trainset: If true, training set will include the validation set, if false,
- training set will be split
- """
- if not self._tnexp.network.__module__ == 'theanets.recurrent':
- assert inputs.shape[1] == targets.shape[1]
- assert inputs.shape[0] == self.input_count
- if not isinstance(self._tnexp.network, tn.Classifier):
- assert targets.shape[0] == self.output_count
- # todo:
- tgts = targets[0] if isinstance(
- self._tnexp.network, tn.Classifier) else targets.T
- training_index, validation_index = range_sel(
- validation_factor, inputs.shape[1], full_trainset)
- t1 = np.transpose(inputs[:, training_index])
- t2 = (tgts[training_index])
- training_set = [t1, t2]
- validation_set = [np.transpose(inputs[:, validation_index]),
- (tgts[validation_index])]
- return training_set, validation_set
- def train(self, inputs, targets, validation_factor=0.1, full_trainset=False, train_till_error=None,
- train_attempts=1, retry_with_other_weights=False, error_abs=True, **kwargs):
- """
- Trains by array of in/outputs. Keyworded args can be passed to theanets for fine tuninig
- of the training process.
- Inputs, targets: Numpy arrays of values.
- validation_factor: Fraction of data that will be used for validation during training
- full_trainset: If true, training set will include the validation set, if false,
- training set will be split
- train_till_error: None, or an error value which is determined as the stopping point
- for the traininf process.
- train_attempts: Number of training attempts. Data/validation sets are randomly shuffled
- retry_with_other_weights: If true, after train_attempts the net will reinitialize with different
- weights and try train_attempts more attempts if target error value is
- not achieved. This happens only once, so no infinite loops here.
- error_abs: if false, train_till_error will be treated as relative error (err / var(inputs))
- However, such comparison is slow, so it is usually better to use custom error
- functions for handling error values
- """
- # todo: warn if retry params are set, but train_till_error is not
- if retry_with_other_weights and self._train_attempts is None:
- self._train_attempts = train_attempts
- if not retry_with_other_weights:
- self._train_attempts = None
- training, validation = self._prertrain(
- inputs, targets, validation_factor, full_trainset)
- if train_till_error == None:
- return self._tnexp.run(training, validation, **kwargs)
- else:
- if not error_abs:
- var = np.var(training[1])
- print('var = ', var)
- adj_tte = train_till_error if error_abs else train_till_error * var
- ready = False
- i = 0
- for train, valid in self._tnexp.itertrain(training, validation, **kwargs):
- ready = (train['loss'] <= adj_tte)
- i += 1
- if(ready):
- print(i, "iterations till target error, ",
- train_attempts, "attempts left, ", 'current error', train['loss'])
- return train
- if train_attempts == 0:
- if(retry_with_other_weights):
- print("Re-initializing net...")
- self._reinit()
- return self.train(inputs, targets, validation_factor, full_trainset,
- train_till_error, self._train_attempts, retry_with_other_weights=False,
- error_abs=error_abs, **kwargs)
- # todo: suggestt retying with other weights if it is possible
- warnings.warn('Could not train till specified error value. Try supplying more data, training with a different algorithm or specifying a higher error value',
- RuntimeWarning)
- return train
- # In the new train() call the datasets are randomly permuted
- print("Remaining training attempts:",
- train_attempts - 1, 'iterations:', i, 'current error', train['loss'])
- return self.train(inputs, targets, validation_factor, full_trainset,
- train_till_error, train_attempts - 1, retry_with_other_weights=retry_with_other_weights,
- error_abs=error_abs, **kwargs)
- def _reinit(self):
- """
- re-initializes the network for the experiment with stored parameters
- This makes weights random, so any training done before will become useless.
- """
- del(self._tnexp)
- self._init_exp()
- pass
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement