Guest User

Untitled

a guest
Oct 16th, 2018
83
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.89 KB | None | 0 0
  1. #!/usr/bin/env python3
  2. import os
  3. from baselines.common import tf_util as U
  4. from baselines import logger
  5. import gymfc
  6. import argparse
  7. import numpy as np
  8. import matplotlib
  9. matplotlib.use('Agg')
  10. import matplotlib.pyplot as plt
  11. import gym
  12. import math
  13.  
  14. def train(num_timesteps, seed, model_path=None, env_id=None):
  15. from baselines.ppo1 import mlp_policy, pposgd_simple
  16. U.make_session(num_cpu=1).__enter__()
  17. def policy_fn(name, ob_space, ac_space):
  18. return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
  19. hid_size=64, num_hid_layers=2)
  20.  
  21. env = gym.make(env_id)
  22.  
  23. # parameters below were the best found in a simple random search
  24. # these are good enough to make humanoid walk, but whether those are
  25. # an absolute best or not is not certain
  26. env = RewScale(env, 0.1)
  27. pi = pposgd_simple.learn(env, policy_fn,
  28. max_timesteps=num_timesteps,
  29. timesteps_per_actorbatch=2048,
  30. clip_param=0.2, entcoeff=0.0,
  31. optim_epochs=10,
  32. optim_stepsize=3e-4,
  33. optim_batchsize=64,
  34. gamma=0.99,
  35. lam=0.95,
  36. schedule='linear',
  37. )
  38. env.close()
  39. if model_path:
  40. U.save_state(model_path)
  41.  
  42. return pi
  43.  
  44. class RewScale(gym.RewardWrapper):
  45. def __init__(self, env, scale):
  46. gym.RewardWrapper.__init__(self, env)
  47. self.scale = scale
  48. def reward(self, r):
  49. return r * self.scale
  50.  
  51. def plot_step_response(desired, actual,
  52. end=1., title=None,
  53. step_size=0.001, threshold_percent=0.1):
  54. """
  55. Args:
  56. threshold (float): Percent of the start error
  57. """
  58.  
  59. #actual = actual[:,:end,:]
  60. end_time = len(desired) * step_size
  61. t = np.arange(0, end_time, step_size)
  62.  
  63. #desired = desired[:end]
  64. threshold = threshold_percent * desired
  65.  
  66. plot_min = -math.radians(350)
  67. plot_max = math.radians(350)
  68.  
  69. subplot_index = 3
  70. num_subplots = 3
  71.  
  72. f, ax = plt.subplots(num_subplots, sharex=True, sharey=False)
  73. f.set_size_inches(10, 5)
  74. if title:
  75. plt.suptitle(title)
  76. ax[0].set_xlim([0, end_time])
  77. res_linewidth = 2
  78. linestyles = ["c", "m", "b", "g"]
  79. reflinestyle = "k--"
  80. error_linestyle = "r--"
  81.  
  82. # Always
  83. ax[0].set_ylabel("Roll (rad/s)")
  84. ax[1].set_ylabel("Pitch (rad/s)")
  85. ax[2].set_ylabel("Yaw (rad/s)")
  86.  
  87. ax[-1].set_xlabel("Time (s)")
  88.  
  89.  
  90. """ ROLL """
  91. # Highlight the starting x axis
  92. ax[0].axhline(0, color="#AAAAAA")
  93. ax[0].plot(t, desired[:,0], reflinestyle)
  94. ax[0].plot(t, desired[:,0] - threshold[:,0] , error_linestyle, alpha=0.5)
  95. ax[0].plot(t, desired[:,0] + threshold[:,0] , error_linestyle, alpha=0.5)
  96.  
  97. r = actual[:,0]
  98. ax[0].plot(t[:len(r)], r, linewidth=res_linewidth)
  99.  
  100. ax[0].grid(True)
  101.  
  102.  
  103.  
  104. """ PITCH """
  105.  
  106. ax[1].axhline(0, color="#AAAAAA")
  107. ax[1].plot(t, desired[:,1], reflinestyle)
  108. ax[1].plot(t, desired[:,1] - threshold[:,1] , error_linestyle, alpha=0.5)
  109. ax[1].plot(t, desired[:,1] + threshold[:,1] , error_linestyle, alpha=0.5)
  110. p = actual[:,1]
  111. ax[1].plot(t[:len(p)],p, linewidth=res_linewidth)
  112. ax[1].grid(True)
  113.  
  114.  
  115. """ YAW """
  116. ax[2].axhline(0, color="#AAAAAA")
  117. ax[2].plot(t, desired[:,2], reflinestyle)
  118. ax[2].plot(t, desired[:,2] - threshold[:,2] , error_linestyle, alpha=0.5)
  119. ax[2].plot(t, desired[:,2] + threshold[:,2] , error_linestyle, alpha=0.5)
  120. y = actual[:,2]
  121. ax[2].plot(t[:len(y)],y , linewidth=res_linewidth)
  122. ax[2].grid(True)
  123.  
  124. plt.savefig("gymfc-ppo-step-response.pdf")
  125.  
  126. def main():
  127. parser = argparse.ArgumentParser()
  128. logger.configure()
  129. parser.add_argument('--env', type=str)
  130. parser.add_argument('--seed', help='RNG seed', type=int, default=0)
  131. parser.add_argument('--model-path', default=os.path.join(logger.get_dir(), 'humanoid_policy'))
  132. parser.add_argument('--play', action="store_true", default=False)
  133. parser.add_argument('--num-timesteps', type=int, default=1e7)
  134.  
  135. args = parser.parse_args()
  136.  
  137. if not args.play:
  138. # train the model
  139. train(num_timesteps=args.num_timesteps, seed=args.seed, model_path=args.model_path, env_id=args.env)
  140. else:
  141. print (" Making env=", args.env)
  142. # construct the model object, load pre-trained model and render
  143. pi = train(num_timesteps=1, seed=args.seed, env_id=args.env)
  144. U.load_state(args.model_path)
  145.  
  146. env = gym.make(args.env)
  147. ob = env.reset()
  148. actuals = []
  149. desireds = []
  150. while True:
  151. desired = env.omega_target
  152. actual = env.omega_actual
  153. actuals.append(actual)
  154. desireds.append(desired)
  155. print ("sp=", desired, " rate=", actual)
  156. action = pi.act(stochastic=False, ob=ob)[0]
  157. ob, _, done, _ = env.step(action)
  158. if done:
  159. break
  160. plot_step_response(np.array(desireds), np.array(actuals))
  161.  
  162.  
  163.  
  164.  
  165. if __name__ == '__main__':
  166. main()
Add Comment
Please, Sign In to add comment