Advertisement
Guest User

Untitled

a guest
Jul 30th, 2016
60
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.74 KB | None | 0 0
  1. #!/usr/bin/env python
  2.  
  3. import argparse
  4. import gym
  5. import numpy as np
  6.  
  7.  
  8. def run_episode(env, weights, time_limit=2000, submit=False):
  9. observation = env.reset()
  10. episode_reward = 0
  11. for t in range(time_limit):
  12. if not submit:
  13. env.render()
  14. prediction = np.matmul(weights, observation)
  15. action = 0 if np.sign(np.dot(weights, observation)) <= 0 else 1
  16. observation, reward, done, info = env.step(action)
  17. episode_reward += reward
  18. if done:
  19. break
  20. print('Episode reward: {}'.format(episode_reward))
  21. return episode_reward
  22.  
  23.  
  24. def train(env, episodes_limit=3000, step_factor=.5, submit=False):
  25. num_spaces = 4
  26. weights = np.random.rand(num_spaces) * 5
  27. best_result = 0
  28.  
  29. # Train our linear model.
  30. for _ in range(episodes_limit):
  31. noise = np.random.rand(num_spaces) * step_factor
  32. result = run_episode(env, weights + noise, submit=submit)
  33. if result > best_result:
  34. weights, best_result = weights + noise, result
  35.  
  36. return best_result, weights
  37.  
  38. if __name__ == '__main__':
  39. parser = argparse.ArgumentParser()
  40. parser.add_argument('--submit', action='store_true', help='submit results')
  41. args = parser.parse_args()
  42. env = gym.make('CartPole-v0')
  43. print('Training our model')
  44. best_result, weights = train(env, submit=args.submit)
  45. if args.submit:
  46. env.monitor.start('cartpole-experiment/', force=True)
  47. print(best_result)
  48. if args.submit:
  49. print('Running tries for submission')
  50. submission_tries = 100
  51. for _ in range(submission_tries):
  52. run_episode(env, weights, submit=args.submit)
  53. env.monitor.close()
  54. gym.upload('cartpole-experiment/', api_key='sk_XJqn2jHQ5SAp2XuqoAgew')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement