Advertisement
Guest User

Untitled

a guest
Sep 25th, 2018
72
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 10.15 KB | None | 0 0
  1. import tensorflow as tf
  2. import numpy as np
  3. import gym
  4. import time
  5. import myenv
  6. import threading
  7. from colorama import init
  8. from colorama import Fore, Back, Style
  9. import collections
  10. import itertools
  11. import pickle
  12.  
  13.  
  14. ##################### hyper parameters ####################
  15.  
  16. MAX_EPISODES = 200
  17. MAX_EP_STEPS = 200
  18. LR_A = 0.001 # learning rate for actor
  19. LR_C = 0.002 # learning rate for critic
  20. GAMMA = 0.9 # reward discount
  21. TAU = 0.01 # soft replacement
  22. MEMORY_CAPACITY = 10000
  23. BATCH_SIZE = 32
  24.  
  25. RENDER = True
  26. ENV_NAME = 'myenv-v0'
  27.  
  28. init(convert=True) #Colors Related
  29.  
  30. ############################### DDPG ####################################
  31.  
  32.  
  33. class DDPG(object):
  34. def __init__(self, a_dim, s_dim, a_bound,):
  35. try:
  36. with open('Training_Memory', 'rb') as f:
  37. self.memory = pickle.load(f)
  38.  
  39. except FileNotFoundError:
  40. self.memory = np.zeros((MEMORY_CAPACITY, s_dim * 2 + a_dim + 1), dtype=np.float32)
  41.  
  42. print(self.memory)
  43. self.pointer = 0
  44. self.sess = tf.Session()
  45.  
  46. self.a_dim, self.s_dim, self.a_bound = a_dim, s_dim, a_bound,
  47.  
  48. s = env.reset() #!!!!!!!!!!!!!
  49.  
  50. print(env.shapingsize)
  51. print("printed env.shapingsize")
  52. print(type(env.shapingsize))
  53. print("printed type shapingsize")
  54.  
  55. shapingsize = env.shapingsize
  56.  
  57. shapingsize1 = shapingsize[0]
  58.  
  59. shapingsize2 = shapingsize[1]
  60.  
  61. print(shapingsize1)
  62. print("printed shapingsize1")
  63. print(type(shapingsize1))
  64. print("printed type of new shapingsize1")
  65.  
  66. print(shapingsize2)
  67. print("printed shapingsize2")
  68. print(type(shapingsize2))
  69. print("printed type of new shapingsize2")
  70.  
  71. self.S = tf.placeholder(np.float32, [shapingsize2, shapingsize1], 's')
  72. self.S_ = tf.placeholder(np.float32, [shapingsize2, shapingsize1], 's_')
  73. # self.S = tf.placeholder(tf.float32, [None, s_dim], 's')
  74. # self.S_ = tf.placeholder(tf.float32, [None, s_dim], 's_')
  75. self.R = tf.placeholder(tf.float32, [None, 1], 'r')
  76.  
  77. print(self.S)
  78. print("printed self.S in placeholder")
  79. print(type(self.S))
  80. print("printed type of self.S in placeholder")
  81.  
  82. print(s)
  83. print("Printed s before a build ")
  84.  
  85. print(s.shape)
  86. print("Printed s shape before a build ")
  87.  
  88. self.a = self._build_a(self.S,)
  89. q = self._build_c(self.S, self.a, )
  90. a_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Actor')
  91. c_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Critic')
  92. ema = tf.train.ExponentialMovingAverage(decay=1 - TAU) # soft replacement
  93.  
  94. def ema_getter(getter, name, *args, **kwargs):
  95. return ema.average(getter(name, *args, **kwargs))
  96.  
  97. target_update = [ema.apply(a_params), ema.apply(c_params)] # soft update operation
  98. a_ = self._build_a(self.S_, reuse=True, custom_getter=ema_getter) # replaced target parameters
  99. q_ = self._build_c(self.S_, a_, reuse=True, custom_getter=ema_getter)
  100.  
  101. a_loss = - tf.reduce_mean(q) # maximize the q
  102. self.atrain = tf.train.AdamOptimizer(LR_A).minimize(a_loss, var_list=a_params)
  103.  
  104. with tf.control_dependencies(target_update): # soft replacement happened at here
  105. q_target = self.R + GAMMA * q_
  106. td_error = tf.losses.mean_squared_error(labels=q_target, predictions=q)
  107. self.ctrain = tf.train.AdamOptimizer(LR_C).minimize(td_error, var_list=c_params)
  108.  
  109. self.sess.run(tf.global_variables_initializer())
  110.  
  111. def choose_action(self, s):
  112. # s = np.array(s, dtype=object)
  113. print(s)
  114. print("printed state in choose action")
  115. return self.sess.run(self.a, {self.S: s[np.newaxis, :]})[0]
  116.  
  117. def learn(self):
  118. indices = np.random.choice(MEMORY_CAPACITY, size=BATCH_SIZE)
  119. bt = self.memory[indices, :]
  120. bs = bt[:, :self.s_dim]
  121. ba = bt[:, self.s_dim: self.s_dim + self.a_dim]
  122. br = bt[:, -self.s_dim - 1: -self.s_dim]
  123. bs_ = bt[:, -self.s_dim:]
  124.  
  125. self.sess.run(self.atrain, {self.S: bs})
  126. self.sess.run(self.ctrain, {self.S: bs, self.a: ba, self.R: br, self.S_: bs_})
  127.  
  128. def store_transition(self, s, a, r, s_):
  129. # s_ = np.array(s_)
  130. transition = np.hstack((s, a, [r], s_))
  131. index = self.pointer % MEMORY_CAPACITY # replace the old memory with new memory
  132. self.memory[index, :] = transition
  133. with open('Training_Memory', 'wb') as f:
  134. pickle.dump(self.memory, f)
  135. print(self.memory)
  136. self.pointer += 1
  137.  
  138. def _build_a(self, s, reuse=None, custom_getter=None):
  139. trainable = True if reuse is None else False
  140. with tf.variable_scope('Actor', reuse=reuse, custom_getter=custom_getter):
  141.  
  142. s = tf.reshape(s, [1, -1])
  143.  
  144. print(s)
  145. print("Printed s in build a ")
  146.  
  147. print(s.get_shape())
  148. print("Printed s shape in build a ")
  149.  
  150. net = tf.layers.dense(s, 5, activation=tf.nn.relu, name='l1', trainable=trainable) #Neurons
  151. a = tf.layers.dense(net, 3, activation=tf.nn.tanh, name='a', trainable=trainable)
  152. # return tf.multiply(a, self.a_bound, name='scaled_a')
  153. return tf.nn.softmax(a)
  154.  
  155. def _build_c(self, s, a, reuse=None, custom_getter=None):
  156. # s = env.reset()
  157.  
  158. shapingsize = env.shapingsize
  159.  
  160. shapingsize1 = shapingsize[0]
  161.  
  162. shapingsize2 = shapingsize[1]
  163.  
  164. print(shapingsize1)
  165.  
  166. print("Printed shapingsize1 in build c")
  167.  
  168. trainable = True if reuse is None else False
  169. with tf.variable_scope('Critic', reuse=reuse, custom_getter=custom_getter):
  170.  
  171. n_l1 = shapingsize1
  172.  
  173. w1_s = tf.get_variable('w1_s', [shapingsize1 * shapingsize2, n_l1], trainable=trainable)
  174. w1_a = tf.get_variable('w1_a', [3, n_l1], trainable=trainable)
  175.  
  176. print(w1_s)
  177. print("printed w1_s")
  178. print(w1_a)
  179. print("printed w1_a")
  180. print(s)
  181. print("printed s in build c")
  182. print(a)
  183. print("printed a")
  184.  
  185. print(s.get_shape())
  186. print("printed s tf shape in build c")
  187. print(w1_s.get_shape())
  188. print("printed w1 s tf shape")
  189. print(w1_a.get_shape())
  190. print("printed w1 a tf shape")
  191.  
  192. b1 = tf.get_variable('b1', [1, n_l1], trainable=trainable)
  193.  
  194. print(b1)
  195. print("printed b1")
  196. print(b1.get_shape())
  197. print("printed b1 shape")
  198.  
  199. s = tf.reshape(s, [1, -1])
  200.  
  201. net = tf.nn.relu(tf.matmul(s, w1_s) + tf.matmul(a, w1_a) + b1)
  202. return tf.layers.dense(net, 1, trainable=trainable) # Q(s,a)
  203.  
  204.  
  205. ############################### training ####################################
  206.  
  207.  
  208. env = gym.make(ENV_NAME)
  209. env = env.unwrapped
  210. env.seed(1)
  211.  
  212. def startpoint():
  213.  
  214. LoadingNumber = 0
  215. print("")
  216. print(Fore.CYAN + "#====================#")
  217. print("")
  218. while True:
  219. print("Starting Main Code...")
  220. print("")
  221. LoadingNumber = LoadingNumber + 1
  222. time.sleep(5)
  223. if LoadingNumber > 4:
  224. break
  225. print(Fore.CYAN + "#====================#")
  226. print("")
  227. env = gym.make(ENV_NAME)
  228. env = env.unwrapped
  229. env.seed(1)
  230.  
  231.  
  232. s = env.reset()
  233.  
  234. print(env.shapingsize)
  235. print("printed env.shapingsize")
  236. print(type(env.shapingsize))
  237. print("printed type shapingsize")
  238.  
  239. shapingsize = env.shapingsize
  240.  
  241. shapingsize1 = shapingsize[0]
  242.  
  243. shapingsize2 = shapingsize[1]
  244.  
  245. print(shapingsize1)
  246. print("printed shapingsize1")
  247. print(type(shapingsize1))
  248. print("printed type of new shapingsize1")
  249.  
  250. print(shapingsize2)
  251. print("printed shapingsize2")
  252. print(type(shapingsize2))
  253. print("printed type of new shapingsize2")
  254.  
  255. s_dim = shapingsize2
  256. a_dim = 3
  257. a_bound = 3
  258. a_bound = np.array(a_bound)
  259. # s = np.array(s, dtype=object)
  260.  
  261. print(s)
  262. print("printed s in the begin")
  263.  
  264. print(s_dim)
  265. print("printed s_dim in the begin")
  266.  
  267. ddpg = DDPG(a_dim, s_dim, a_bound)
  268.  
  269. var = 3 # control exploration
  270. t1 = time.time()
  271.  
  272. CurrentEpisode = 0
  273.  
  274. for i in range(MAX_EPISODES):
  275. # mi = mi[mi]
  276. CurrentEpisode += 1
  277. print("Currently in Episode : " + str(CurrentEpisode))
  278. # s = np.array(s, dtype=object)
  279. ep_reward = 0
  280. for j in range(MAX_EP_STEPS):
  281. # if RENDER:
  282. # env.render()
  283.  
  284. # Add exploration noise
  285. a = ddpg.choose_action(s)
  286. s_, r, done, info = env.step(a)
  287.  
  288. ddpg.store_transition(s, a, r / 10, s_)
  289.  
  290. if ddpg.pointer > MEMORY_CAPACITY:
  291. var *= .9995 # decay the action randomness
  292. ddpg.learn()
  293.  
  294. print(s)
  295. print("printed s in loop")
  296.  
  297. print(s.getshape())
  298. print("printed s shape in loop")
  299.  
  300. s = s_
  301. ep_reward += r
  302.  
  303. print(s)
  304. print("printed s in loop after s thing")
  305.  
  306. print(s.getshape())
  307. print("printed s shape in loop after s thing")
  308.  
  309. if env.StopEpisode:
  310. print("")
  311. print("Next Episode")
  312. print("")
  313. break
  314. # if j == MAX_EP_STEPS-1:
  315. # print('Episode:', i, ' Reward: %i' % int(ep_reward), 'Explore: %.2f' % var, )
  316. # # if ep_reward > -300:RENDER = True
  317. # break
  318.  
  319.  
  320. #print('Running time: ', time.time() - t1)
  321.  
  322. startpoint()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement