Advertisement
Guest User

Untitled

a guest
May 23rd, 2017
59
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.90 KB | None | 0 0
  1. 'Esse arquivo gera alguns stats dos algoritmos'
  2. import numpy as np
  3. import gym
  4. # from main import random_search
  5. import os
  6.  
  7. def run_episode(env, params, max_reward):
  8. 'Roda o episodio por no max. 200 timesteps, retornanto o totalReward para esse set de params'
  9. observation = env.reset()
  10. totalreward = 0
  11. for _ in range(max_reward):
  12. env.render() #para ver treinado
  13. action = 0 if np.matmul(params, observation) < 0 else 1
  14. observation, reward, done, info = env.step(action)
  15. totalreward += reward
  16. if done:
  17. break
  18. return totalreward
  19.  
  20. def random_search(env, max_reward, streak_counter):
  21. '''
  22. Gera weights aleatorios ate encontrar uma combinacao que
  23. que satisfaca as condicoes impostas
  24. '''
  25. best_params = None
  26. best_reward = 200
  27. streak = 0
  28. episode_counter = 0
  29. for i_episode in range(30000):
  30. print(streak)
  31. if streak == 0:
  32. parameters = np.random.rand(4) * 2 - 1
  33. else:
  34. parameters = best_params
  35.  
  36. reward = run_episode(env, parameters, max_reward)
  37.  
  38. if reward >= best_reward:
  39. best_reward = reward
  40. best_params = parameters
  41. # caso durou 200 timesteps, considere como resolvido
  42. if best_reward >= max_reward:
  43. streak += 1
  44. episode_counter = i_episode
  45.  
  46. if reward < max_reward:
  47. streak = 0
  48.  
  49. if streak > streak_counter:
  50. break
  51.  
  52. return episode_counter
  53.  
  54. out = 'gym/out'
  55. if out:
  56. if not os.path.exists(out):
  57. os.makedirs(out)
  58. else:
  59. if not os.path.exists('gym-out/' + "CartPole-v1"):
  60. os.makedirs('gym-out/' + "CartPole-v1")
  61. out = 'gym-out/' + "CartPole-v1"
  62.  
  63. directory = "gym-out/"
  64. env = gym.make("CartPole-v1")
  65. env = gym.wrappers.Monitor(env, directory,force=True,video_callable=lambda episode_id: episode_id%10000==0)
  66. random_search(env,200, 200)
  67. env.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement