Guest User

Untitled

a guest
Sep 24th, 2018
67
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 13.87 KB | None | 0 0
  1. import numpy as np
  2. import h5py
  3. # import lasagne
  4. import sys
  5. import copy
  6. sys.path.append('../')
  7. from model.ModelUtil import norm_state, scale_state, norm_action, scale_action, action_bound_std, scale_reward, norm_reward
  8. from algorithm.AlgorithmInterface import AlgorithmInterface
  9. from model.LearningUtil import loglikelihood_keras, likelihood_keras, kl_keras, kl_D_keras, entropy_keras
  10. from keras.optimizers import SGD
  11. # from keras.utils.np_utils import to_categoricalnetwork
  12. import keras.backend as K
  13. import keras
  14. from keras.models import Sequential, Model
  15.  
  16. # For debugging
  17. # theano.config.mode='FAST_COMPILE'
  18. # from DeepCACLA import DeepCACLA
  19.  
  20. """
  21. def dice_coef(y_true, y_pred, smooth, thresh):
  22. y_pred = y_pred > thresh
  23. y_true_f = K.flatten(y_true)
  24. y_pred_f = K.flatten(y_pred)
  25. intersection = K.sum(y_true_f * y_pred_f)
  26.  
  27. return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
  28.  
  29. ### Loss
  30. def dice_loss(smooth, thresh):
  31. def dice(y_true, y_pred)
  32. return -dice_coef(y_true, y_pred, smooth, thresh)
  33. return dice
  34. """
  35.  
  36. def flatten(data):
  37.  
  38. for i in data:
  39. if isinstance(i, (list, tuple, np.ndarray)):
  40. for j in flatten(i):
  41. yield j
  42. else:
  43. yield i
  44.  
  45. def getOptimizer(lr, settings):
  46. """
  47. Function to make it easier to select the SGD optimizer to use
  48. """
  49. if ( "optimizer" in settings
  50. and ( settings["optimizer"] == "sgd")):
  51. sgd = keras.optimizers.SGD(lr=lr, momentum=settings["rho"], decay=0.0, nesterov=False)
  52. else:
  53. sgd = keras.optimizers.Adam(lr=np.float32(lr),
  54. beta_1=settings["rho"], beta_2=np.float32(0.999),
  55. epsilon=np.float32(settings["rms_epsilon"]), decay=0.0,
  56. amsgrad=False)
  57. return sgd
  58.  
  59.  
  60. class KERASAlgorithm(AlgorithmInterface):
  61.  
  62. def __init__(self, model, n_in, n_out, state_bounds, action_bounds, reward_bound, settings_, print_info=False):
  63.  
  64. super(KERASAlgorithm,self).__init__(model, n_in, n_out, state_bounds, action_bounds, reward_bound, settings_, print_info=False)
  65.  
  66. def getGrads(self, states, alreadyNormed=False):
  67. """
  68. The states should be normalized
  69. """
  70. # self.setData(states, actions, rewards, result_states)
  71. if ( alreadyNormed == False):
  72. states = norm_state(states, self._state_bounds)
  73. states = np.array(states, dtype=self._settings['float_type'])
  74. # grads = np.reshape(np.array(self._get_gradients([states])[0], dtype=self._settings['float_type']), (states.shape[0],states.shape[1]))
  75. grads = np.array(self._get_gradients([states, 0]), dtype=self._settings['float_type'])
  76. # print ("State grads: ", grads.shape)
  77. # print ("State grads: ", repr(grads))
  78. return grads
  79.  
  80. def reset(self):
  81. """
  82. Reset any state for the agent model
  83. """
  84. self._model.reset()
  85. if not (self._modelTarget is None):
  86. self._modelTarget.reset()
  87.  
  88. def setData(self, states, actions, rewards, result_states, fallen):
  89. pass
  90.  
  91. def updateTargetModel(self):
  92. if (self.getSettings()["print_levels"][self.getSettings()["print_level"]] >= self.getSettings()["print_levels"]['train']):
  93. print ("Updating target Model")
  94. """
  95. Target model updates
  96. """
  97. self._modelTarget.getCriticNetwork().set_weights( copy.deepcopy(self._model.getCriticNetwork().get_weights()))
  98. self._modelTarget.getActorNetwork().set_weights( copy.deepcopy(self._model.getActorNetwork().get_weights()))
  99.  
  100. def printWeights(self):
  101.  
  102. print ("Critic weights: ")
  103. c_w = self._model.getCriticNetwork().get_weights()[0]
  104. cT_w = self._modelTarget.getCriticNetwork().get_weights()[0]
  105. print ("critic diff: ", c_w - cT_w)
  106.  
  107. print ("Actor weights: ")
  108. a_w = self._model.getActorNetwork().get_weights()[0]
  109. aT_w = self._modelTarget.getActorNetwork().get_weights()[0]
  110. print ("Actor diff: ", a_w - aT_w)
  111.  
  112. def getNetworkParameters(self):
  113. params = []
  114. params.append(copy.deepcopy(self._model.getCriticNetwork().get_weights()))
  115. params.append(copy.deepcopy(self._model.getActorNetwork().get_weights()))
  116. params.append(copy.deepcopy(self._modelTarget.getCriticNetwork().get_weights()))
  117. params.append(copy.deepcopy(self._modelTarget.getActorNetwork().get_weights()))
  118. return params
  119.  
  120. def setNetworkParameters(self, params):
  121. self._model.getCriticNetwork().set_weights(params[0])
  122. self._model.getActorNetwork().set_weights( params[1] )
  123. self._modelTarget.getCriticNetwork().set_weights( params[2])
  124. self._modelTarget.getActorNetwork().set_weights( params[3])
  125.  
  126. def trainCritic(self, states, actions, rewards, result_states, falls, G_t=[[0]], p=1.0,
  127. updates=1, batch_size=None):
  128.  
  129. self.reset()
  130. if (batch_size is None):
  131. batch_size_=states.shape[0]
  132. else:
  133. batch_size_=batch_size
  134.  
  135. if (( self._updates % self._weight_update_steps) == 0):
  136. self.updateTargetModel()
  137. self._updates += 1
  138. if ('dont_use_td_learning' in self.getSettings()
  139. and self.getSettings()['dont_use_td_learning'] == True):
  140. # y_ = self._value_Target([result_states,0])[0]
  141. y_ = self._modelTarget.getCriticNetwork().predict(result_states, batch_size=states.shape[0])
  142. target_ = rewards + ((self._discount_factor * y_))
  143. target_2 = norm_reward(G_t, self.getRewardBounds()) * (1.0-self.getSettings()['discount_factor'])
  144. target = (target_ + target_2) / 2.0
  145. else:
  146. # y_ = self._modelTarget.getCriticNetwork().predict(result_states, batch_size=states.shape[0])
  147. # y_ = self._value_Target([result_states,0])[0]
  148. y_ = self._modelTarget.getCriticNetwork().predict(result_states, batch_size=states.shape[0])
  149. # v = self._model.getCriticNetwork().predict(states, batch_size=states.shape[0])
  150. # target_ = rewards + ((self._discount_factor * y_) * falls)
  151. target = rewards + ((self._discount_factor * y_))
  152. # y_ = self._modelTarget.getCriticNetwork().predict(result_states, batch_size=states.shape[0])
  153. # target_ = rewards + ((self._discount_factor * y_) * falls)
  154. target = np.array(target, dtype=self._settings['float_type'])
  155. if ("use_fall_reward_shaping" in self._settings
  156. and (self._settings["use_fall_reward_shaping"] == True)):
  157. # print ("Shaping reward", np.concatenate((target_, falls, target_ * falls), axis=1))
  158. target = target * falls
  159. # states = np.array(states, dtype=self._settings['float_type'])
  160. # print ("target type: ", target_.dtype)
  161. # print ("states type: ", states.dtype)
  162. """
  163. v = self._model.getCriticNetwork().predict(states, batch_size=states.shape[0])
  164. v_ = self._model.getCriticNetwork().predict(result_states, batch_size=states.shape[0])
  165. y_ = self._modelTarget.getCriticNetwork().predict(states, batch_size=states.shape[0])
  166. y__ = self._value_Target([states,0])[0]
  167. v__ = self._value([states,0])[0]
  168. self.printWeights()
  169. # print ("Critic Target: ", np.concatenate((v, target_, rewards, y_) ,axis=1) )
  170. c_error = np.mean(np.mean(np.square(v - target_), axis=1))
  171. """
  172. if ('anneal_learning_rate' in self.getSettings()
  173. and (self.getSettings()['anneal_learning_rate'] == True)):
  174. K.set_value(self._model.getCriticNetwork().optimizer.lr, np.float32(self.getSettings()['critic_learning_rate']) * p)
  175. # lr = K.get_value(self._model.getCriticNetwork().optimizer.lr)
  176. # print ("New critic learning rate: ", lr)
  177. # print ("critic error: ", np.mean(np.mean(np.square(v - target_), axis=1)))
  178. # if (c_error < 10.0):
  179. score = self._model.getCriticNetwork().fit(states, target,
  180. epochs=updates, batch_size=batch_size_,
  181. verbose=0,
  182. # shuffle=True
  183. # callbacks=[early_stopping],
  184. )
  185. loss = score.history['loss'][0]
  186. #else:
  187. # print ("Critic error to high:", c_error)
  188. # loss = 0
  189. # print(" Critic loss: ", loss)
  190.  
  191. return loss
  192.  
  193. def train(self, states, actions, rewards, result_states, falls):
  194. loss = self.trainCritic(states, actions, rewards, result_states, falls)
  195. lossActor = self.trainActor(states, actions, rewards, result_states, falls)
  196. return loss
  197.  
  198. def predict(self, state, deterministic_=True, evaluation_=False, p=None, sim_index=None, bootstrapping=False):
  199. state = norm_state(state, self._state_bounds)
  200. state = np.array(state, dtype=self._settings['float_type'])
  201. # if deterministic_:
  202. # print ("state: ", np.array([state]).shape)
  203. action_ = scale_action(self._model.getActorNetwork().predict([state],
  204. batch_size=1)[:,:self._action_length], self._action_bounds)
  205. return action_
  206.  
  207. def predictWithDropout(self, state, deterministic_=True):
  208. # states = np.zeros((self._batch_size, self._state_length), dtype=self._settings['float_type'])
  209. # states[0, ...] = state
  210. state = np.array(state, dtype=self._settings['float_type'])
  211. state = norm_state(state, self._state_bounds)
  212. self._model.setStates(state)
  213. # action_ = lasagne.layers.get_output(self._model.getActorNetwork(), state, deterministic=deterministic_).mean()
  214. # action_ = scale_action(self._q_action()[0], self._action_bounds)
  215. # if deterministic_:
  216. action_ = scale_action(self._model.getActorNetwork().predict(states, batch_size=1)[:,:self._action_length], self._action_bounds)
  217. # else:
  218. # action_ = scale_action(self._q_action()[0], self._action_bounds)
  219. # action_ = q_valsActA[0]
  220. return action_
  221.  
  222. def q_value(self, state):
  223. # states = np.zeros((self._batch_size, self._state_length), dtype=self._settings['float_type'])
  224. # states[0, ...] = state
  225. state = norm_state(state, self._state_bounds)
  226. state = np.array(state, dtype=self._settings['float_type'])
  227. # return scale_reward(self._q_valTarget(), self.getRewardBounds())[0]
  228. # print ("State shape: ", state.shape)
  229. value = scale_reward(self._model.getCriticNetwork().predict(state), self.getRewardBounds()) * (1.0 / (1.0- self.getSettings()['discount_factor']))
  230. # value = scale_reward(self._value([state,0])[0], self.getRewardBounds()) * (1.0 / (1.0- self.getSettings()['discount_factor']))
  231. # print ("value: ", repr(np.array(value)))
  232. return value
  233. # return self._q_val()[0]
  234.  
  235. def q_values(self, states):
  236. states = np.array(states, dtype=self._settings['float_type'])
  237. # print("states: ", repr(states))
  238. values = self._model.getCriticNetwork().predict(states)
  239. # values = self._value([states,0])[0]
  240. # print ("values: ", repr(np.array(values)))
  241. return values
  242.  
  243. def q_valueWithDropout(self, state):
  244. # states = np.zeros((self._batch_size, self._state_length), dtype=self._settings['float_type'])
  245. # states[0, ...] = state
  246. state = np.array(state, dtype=self._settings['float_type'])
  247. state = norm_state(state, self._state_bounds)
  248. self._model.setStates(state)
  249. return scale_reward(self._q_val_drop(), self.getRewardBounds())
  250.  
  251. def saveTo(self, fileName):
  252. # print(self, "saving model")
  253. import h5py
  254. hf = h5py.File(fileName+"_bounds.h5", "w")
  255. hf.create_dataset('_state_bounds', data=self.getStateBounds())
  256. hf.create_dataset('_reward_bounds', data=self.getRewardBounds())
  257. hf.create_dataset('_action_bounds', data=self.getActionBounds())
  258. # hf.create_dataset('_result_state_bounds', data=self.getResultStateBounds())
  259. hf.flush()
  260. hf.close()
  261. suffix = ".h5"
  262. ### Save models
  263. # self._model._actor_train.save(fileName+"_actor_train"+suffix, overwrite=True)
  264. self._model._actor.save(fileName+"_actor"+suffix, overwrite=True)
  265. self._model._critic.save(fileName+"_critic"+suffix, overwrite=True)
  266. if (self._modelTarget is not None):
  267. self._modelTarget._actor.save(fileName+"_actor_T"+suffix, overwrite=True)
  268. self._modelTarget._critic.save(fileName+"_critic_T"+suffix, overwrite=True)
  269. # print ("self._model._actor_train: ", self._model._actor_train)
  270.  
  271. def loadFrom(self, fileName):
  272. from keras.models import load_model
  273. import h5py
  274. suffix = ".h5"
  275. print ("Loading agent: ", fileName)
  276. # with K.get_session().graph.as_default() as g:
  277. self._model._actor = load_model(fileName+"_actor"+suffix)
  278. self._model._critic = load_model(fileName+"_critic"+suffix)
  279. if (self._modelTarget is not None):
  280. self._modelTarget._actor = load_model(fileName+"_actor_T"+suffix)
  281. self._modelTarget._critic = load_model(fileName+"_critic_T"+suffix)
  282.  
  283. self.compile()
  284. # self._model._actor_train = load_model(fileName+"_actor_train"+suffix, custom_objects={'loss': pos_y})
  285. # self._value = K.function([self._model.getStateSymbolicVariable(), K.learning_phase()], [self.__value])
  286. # self._value_Target = K.function([self._model.getResultStateSymbolicVariable(), K.learning_phase()], [self.__value_Target])
  287. hf = h5py.File(fileName+"_bounds.h5",'r')
  288. self.setStateBounds(np.array(hf.get('_state_bounds')))
  289. self.setRewardBounds(np.array(hf.get('_reward_bounds')))
  290. self.setActionBounds(np.array(hf.get('_action_bounds')))
  291. print ("critic self.getStateBounds(): ", self.getStateBounds())
  292. # self._result_state_bounds = np.array(hf.get('_result_state_bounds'))
  293. hf.close()
Add Comment
Please, Sign In to add comment