Guest User

LauzHackDays

a guest
Dec 14th, 2018
57
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.74 KB | None | 0 0
  1. # Derived from keras-rl
  2. import opensim as osim
  3. import numpy as np
  4. import sys
  5.  
  6. from keras.models import Sequential, Model
  7. from keras.layers import Dense, Activation, Flatten, Input, concatenate
  8. from keras.optimizers import Adam
  9.  
  10. import numpy as np
  11.  
  12. from rl.agents import DDPGAgent
  13. from rl.memory import SequentialMemory
  14. from rl.random import OrnsteinUhlenbeckProcess
  15.  
  16. from osim.env import *
  17. from osim.http.client import Client
  18.  
  19. from keras.optimizers import RMSprop
  20.  
  21. import argparse
  22. import math
  23.  
  24. # Command line parameters
  25. parser = argparse.ArgumentParser(description='Train or test neural net motor controller')
  26. parser.add_argument('--train', dest='train', action='store_true', default=True)
  27. parser.add_argument('--test', dest='train', action='store_false', default=True)
  28. parser.add_argument('--steps', dest='steps', action='store', default=10000, type=int)
  29. parser.add_argument('--visualize', dest='visualize', action='store_true', default=False)
  30. parser.add_argument('--model', dest='model', action='store', default="example.h5f")
  31. parser.add_argument('--token', dest='token', action='store', required=False)
  32. args = parser.parse_args()
  33.  
  34. # Load walking environment
  35. env = ProstheticsEnv(args.visualize)
  36. env.reset()
  37.  
  38. nb_actions = env.action_space.shape[0]
  39.  
  40. # Total number of steps in training
  41. nallsteps = args.steps
  42.  
  43. # Create networks for DDPG
  44. # Next, we build a very simple model.
  45. actor = Sequential()
  46. actor.add(Flatten(input_shape=(1,) + env.observation_space.shape))
  47. actor.add(Dense(32))
  48. actor.add(Activation('relu'))
  49. actor.add(Dense(32))
  50. actor.add(Activation('relu'))
  51. actor.add(Dense(32))
  52. actor.add(Activation('relu'))
  53. actor.add(Dense(nb_actions))
  54. actor.add(Activation('sigmoid'))
  55. print(actor.summary())
  56.  
  57. action_input = Input(shape=(nb_actions,), name='action_input')
  58. observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input')
  59. flattened_observation = Flatten()(observation_input)
  60. x = concatenate([action_input, flattened_observation])
  61. x = Dense(64)(x)
  62. x = Activation('relu')(x)
  63. x = Dense(64)(x)
  64. x = Activation('relu')(x)
  65. x = Dense(64)(x)
  66. x = Activation('relu')(x)
  67. x = Dense(1)(x)
  68. x = Activation('linear')(x)
  69. critic = Model(inputs=[action_input, observation_input], outputs=x)
  70. print(critic.summary())
  71.  
  72. # Set up the agent for training
  73. memory = SequentialMemory(limit=100000, window_length=1)
  74. random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.2, size=19)
  75. agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
  76.                   memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
  77.                   random_process=random_process, gamma=.99, target_model_update=1e-3,
  78.                   delta_clip=1.)
  79. # agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model,
  80. #                            memory=memory, nb_steps_warmup=1000, random_process=random_process,
  81. #                            gamma=.99, target_model_update=0.1)
  82. agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
  83.  
  84. # Okay, now it's time to learn something! We visualize the training here for show, but this
  85. # slows down training quite a lot. You can always safely abort the training prematurely using
  86. # Ctrl + C.
  87. if args.train:
  88.     agent.fit(env, nb_steps=nallsteps, visualize=False, verbose=1, nb_max_episode_steps=2000, log_interval=1000)
  89.     # After training is done, we save the final weights.
  90.     agent.save_weights(args.model, overwrite=True)
  91.  
  92. # If TEST and no TOKEN, run some test experiments
  93. if not args.train and not args.token:
  94.     agent.load_weights(args.model)
  95.     # Finally, evaluate our algorithm for 1 episode.
  96.     agent.test(env, nb_episodes=1, visualize=False, nb_max_episode_steps=500)
Add Comment
Please, Sign In to add comment