Advertisement
Guest User

Untitled

a guest
Jul 26th, 2016
52
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 10.02 KB | None | 0 0
  1. import theanets as tn
  2. import numpy as np
  3. import sys
  4. from types import MethodType
  5. import warnings
  6.  
  7.  
  8. def range_sel(validation_factor, l, full_trainset=False):
  9. """
  10. Returns a tuple for randomly splitting l samples into 2 groups
  11. with given relation size. if full_trainset, then training set
  12. will include the validation set
  13. """
  14. assert validation_factor <= 1
  15. cut = int((1 - validation_factor) * l)
  16. idx = np.random.permutation(l)
  17. return (idx if full_trainset else idx[:cut], idx[cut:])
  18.  
  19.  
  20. def makeCustomErrorNet(baseclass, custom_error_fn):
  21.  
  22. class CustomErrorNetwork(baseclass):
  23.  
  24. """
  25. Allows custom error fn-s. custom_error_fn must be a function of 2 args, self and outputs.
  26. Custom error functions may need redefining after theanets upgrade.
  27. """
  28.  
  29. def __init__(self, **kwargs):
  30. super(CustomErrorNetwork, self).__init__(**kwargs)
  31. self.custom_error_fn = kwargs.get(custom_error_fn, None)
  32.  
  33. def error(self, outputs):
  34. return self.custom_error_fn(self, outputs)
  35.  
  36. return CustomErrorNetwork(custom_error_fn=custom_error_fn)
  37.  
  38.  
  39. class FFTheanet(object):
  40.  
  41. """feedforward network"""
  42.  
  43. def __init__(self, input_count, hidden_layers, output_count, custom_error_fn=None,
  44. nclass=tn.Regressor, **kwargs):
  45. """
  46. input_count: Number of inputs
  47.  
  48. hidden_layers: Tuple of layer sizes or tuple of dicts, that specify
  49. theanets layer parameters (e.g., {'size:10', 'activation':'softmax'}
  50. makes a layer of 10 softmax neurons. See theansts docs for more info.)
  51.  
  52. output_count: number of outputs
  53.  
  54. custom_error_fn: Custom error function, lambda self,output:error_value. If this is
  55. specified, a new class will be created (based on nclass) with the
  56. redefined error function.
  57.  
  58. nclass: Class of theanets network. If custom_error_fn is defined, it will be
  59. the parent for new class
  60.  
  61. Other named args can be passed to theanets' constructor.
  62. """
  63. self._layers = (
  64. (input_count, ) + tuple(x for x in hidden_layers) + (output_count,))
  65. self._custom_error_fn = custom_error_fn
  66. self._nclass = nclass
  67. self._kwargs = kwargs
  68. self._train_attempts = None
  69. self._init_exp()
  70.  
  71. def _init_exp(self):
  72. """
  73. Initializes the theanets experiment with saved params.
  74.  
  75. """
  76. self._tnexp = tn.Experiment(self._nclass if self._custom_error_fn == None else makeCustomErrorNet(self._nclass, self._custom_error_fn).__class__,
  77. layers=self._layers, **self._kwargs)
  78. if self._custom_error_fn != None:
  79. self._tnexp.network.custom_error_fn = self._custom_error_fn
  80.  
  81. @property
  82. def input_count(self):
  83. return self._tnexp.network.layers[0].nout
  84.  
  85. @property
  86. def output_count(self):
  87. return self._tnexp.network.layers[-1].nout
  88.  
  89. def predict(self, inputs, classifier_len=None, clasifier_prob=True):
  90. """
  91. Returns the network's outputs for the given inputs. If the net is a classifier, this returns
  92. classifier_len predictions ordered by probability from max to min. If clasifier_prob is True,
  93. returned value includes category probabilities and its shape is
  94. classifier_len * 2 * number_of_experiments.
  95. predict(...)[experiment_number][0]: values
  96. predict(...)[experiment_number][1]: probabilities
  97.  
  98. inputs: Numpy array of input data
  99.  
  100. classifier_len: Amount of most probable categories that will be included in output
  101.  
  102. clasifier_prob: Include category probabilities in output
  103. """
  104. # todo: warn if classifier params are set and this is not a classifier
  105. if classifier_len == None:
  106. classifier_len = self.output_count
  107. # Prediction doesn't mutate the weights!
  108. # validation
  109. assert inputs.shape[0] == self.input_count
  110. # before classifier params:
  111. # return
  112. # np.transpose(self._tnexp.network.predict(np.transpose(inputs)))
  113. outs = np.transpose(self._tnexp.network.predict(np.transpose(inputs)))
  114. if isinstance(self._tnexp.network, tn.Classifier):
  115. # - outs, because argsort goes from min to max and we need highest probability first.
  116. inds = [y[:classifier_len] for y in np.argsort(-outs.T)]
  117. probs = [-x[np.argsort(x)[:classifier_len]] for x in -outs.T]
  118. return np.array(list(zip(inds, probs)) if clasifier_prob else inds).T
  119. else:
  120. return outs
  121.  
  122. def _prertrain(self, inputs, targets, validation_factor, full_trainset):
  123. """
  124. validates and returns the list for training.
  125.  
  126. Inputs, targets: Numpy arrays of values.
  127.  
  128. validation_factor: Fraction of data that will be used for validation during training
  129.  
  130. full_trainset: If true, training set will include the validation set, if false,
  131. training set will be split
  132. """
  133. if not self._tnexp.network.__module__ == 'theanets.recurrent':
  134. assert inputs.shape[1] == targets.shape[1]
  135. assert inputs.shape[0] == self.input_count
  136. if not isinstance(self._tnexp.network, tn.Classifier):
  137. assert targets.shape[0] == self.output_count
  138. # todo:
  139. tgts = targets[0] if isinstance(
  140. self._tnexp.network, tn.Classifier) else targets.T
  141. training_index, validation_index = range_sel(
  142. validation_factor, inputs.shape[1], full_trainset)
  143.  
  144. t1 = np.transpose(inputs[:, training_index])
  145. t2 = (tgts[training_index])
  146. training_set = [t1, t2]
  147. validation_set = [np.transpose(inputs[:, validation_index]),
  148. (tgts[validation_index])]
  149. return training_set, validation_set
  150.  
  151. def train(self, inputs, targets, validation_factor=0.1, full_trainset=False, train_till_error=None,
  152. train_attempts=1, retry_with_other_weights=False, error_abs=True, **kwargs):
  153. """
  154. Trains by array of in/outputs. Keyworded args can be passed to theanets for fine tuninig
  155. of the training process.
  156.  
  157. Inputs, targets: Numpy arrays of values.
  158.  
  159. validation_factor: Fraction of data that will be used for validation during training
  160.  
  161. full_trainset: If true, training set will include the validation set, if false,
  162. training set will be split
  163.  
  164. train_till_error: None, or an error value which is determined as the stopping point
  165. for the traininf process.
  166.  
  167. train_attempts: Number of training attempts. Data/validation sets are randomly shuffled
  168.  
  169. retry_with_other_weights: If true, after train_attempts the net will reinitialize with different
  170. weights and try train_attempts more attempts if target error value is
  171. not achieved. This happens only once, so no infinite loops here.
  172.  
  173. error_abs: if false, train_till_error will be treated as relative error (err / var(inputs))
  174. However, such comparison is slow, so it is usually better to use custom error
  175. functions for handling error values
  176. """
  177. # todo: warn if retry params are set, but train_till_error is not
  178. if retry_with_other_weights and self._train_attempts is None:
  179. self._train_attempts = train_attempts
  180. if not retry_with_other_weights:
  181. self._train_attempts = None
  182. training, validation = self._prertrain(
  183. inputs, targets, validation_factor, full_trainset)
  184. if train_till_error == None:
  185. return self._tnexp.run(training, validation, **kwargs)
  186. else:
  187. if not error_abs:
  188. var = np.var(training[1])
  189. print('var = ', var)
  190. adj_tte = train_till_error if error_abs else train_till_error * var
  191. ready = False
  192. i = 0
  193. for train, valid in self._tnexp.itertrain(training, validation, **kwargs):
  194. ready = (train['loss'] <= adj_tte)
  195. i += 1
  196. if(ready):
  197. print(i, "iterations till target error, ",
  198. train_attempts, "attempts left, ", 'current error', train['loss'])
  199. return train
  200. if train_attempts == 0:
  201. if(retry_with_other_weights):
  202. print("Re-initializing net...")
  203. self._reinit()
  204. return self.train(inputs, targets, validation_factor, full_trainset,
  205. train_till_error, self._train_attempts, retry_with_other_weights=False,
  206. error_abs=error_abs, **kwargs)
  207. # todo: suggestt retying with other weights if it is possible
  208. warnings.warn('Could not train till specified error value. Try supplying more data, training with a different algorithm or specifying a higher error value',
  209. RuntimeWarning)
  210. return train
  211. # In the new train() call the datasets are randomly permuted
  212. print("Remaining training attempts:",
  213. train_attempts - 1, 'iterations:', i, 'current error', train['loss'])
  214.  
  215. return self.train(inputs, targets, validation_factor, full_trainset,
  216. train_till_error, train_attempts - 1, retry_with_other_weights=retry_with_other_weights,
  217. error_abs=error_abs, **kwargs)
  218.  
  219. def _reinit(self):
  220. """
  221. re-initializes the network for the experiment with stored parameters
  222. This makes weights random, so any training done before will become useless.
  223. """
  224. del(self._tnexp)
  225. self._init_exp()
  226. pass
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement