Advertisement
Guest User

Untitled

a guest
Apr 29th, 2016
53
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.83 KB | None | 0 0
  1. import gym
  2. import MDP
  3.  
  4. env = gym.make('CartPole-v0')
  5. A = env.action_space.n;
  6. S = env.observation_space.shape[0] * 20 * 20 * 20 * 20;
  7.  
  8. expBig = MDP.SparseExperience(S, A);
  9. modelBig = MDP.SparseRLModel(expBig, 0.9);
  10.  
  11. expSmall = MDP.SparseExperience(S, A);
  12. modelSmall = MDP.SparseRLModel(expSmall, 0.9);
  13.  
  14. def isStateBig(o, thresh):
  15. for i in range(len(o)):
  16. if o[i] > thresh or o[i] < -thresh:
  17. return True;
  18. return False;
  19.  
  20. def observationToState(o, thresh):
  21. s = int(0);
  22. for i in range(len(o)):
  23. s *= 20;
  24. ox = (min(thresh, max(-thresh, o[i])) + thresh) * 20;
  25. s += int(ox // 20);
  26. return s;
  27.  
  28. solverBig = MDP.PrioritizedSweepingSparseRLModel(modelBig);
  29. policyBig = MDP.QGreedyPolicy(solverBig.getQFunction());
  30.  
  31. solverSmall = MDP.PrioritizedSweepingSparseRLModel(modelSmall);
  32. policySmall = MDP.QGreedyPolicy(solverSmall.getQFunction());
  33.  
  34. threshBig = 1.0;
  35. threshSmall = 0.2;
  36.  
  37. for i_episode in xrange(100):
  38. o = env.reset()
  39.  
  40. for t in xrange(300):
  41. env.render()
  42.  
  43. sBig = observationToState(o, threshBig);
  44. sSmall = observationToState(o, threshSmall)
  45. stateBig = isStateBig(o, threshSmall);
  46.  
  47. if stateBig:
  48. a = policyBig.sampleAction(sBig);
  49. else:
  50. a = policySmall.sampleAction(sSmall);
  51.  
  52. o1, rew, done, info = env.step(a);
  53.  
  54. s1Big = observationToState(o1, threshBig);
  55. s1Small = observationToState(o1, threshSmall)
  56. state1Big = isStateBig(o1, threshSmall);
  57.  
  58. if done:
  59. env.render()
  60. print o1;
  61. if state1Big == False or t >= 199:
  62. rew = 10;
  63. else:
  64. rew = -10;
  65.  
  66. if stateBig == False:
  67. expSmall.record(sSmall, a, s1Small, rew);
  68. modelSmall.sync(sSmall, a, s1Small);
  69. solverSmall.stepUpdateQ(sSmall, a);
  70. solverSmall.batchUpdateQ();
  71.  
  72. expBig.record(sBig, a, s1Big, rew);
  73. modelBig.sync(sBig, a, s1Big);
  74. solverBig.stepUpdateQ(sBig, a);
  75. solverBig.batchUpdateQ();
  76.  
  77. print state1Big, sBig, a, s1Big, rew
  78. print "Episode {} finished after {} timesteps".format(i_episode, t+1)
  79. break
  80.  
  81. if stateBig == False and state1Big:
  82. rew = -10;
  83. elif stateBig == True and state1Big == False:
  84. rew = 10;
  85.  
  86. if stateBig == False:
  87. expSmall.record(sSmall, a, s1Small, rew);
  88. modelSmall.sync(sSmall, a, s1Small);
  89. solverSmall.stepUpdateQ(sSmall, a);
  90. solverSmall.batchUpdateQ();
  91.  
  92. expBig.record(sBig, a, s1Big, rew);
  93. modelBig.sync(sBig, a, s1Big);
  94. solverBig.stepUpdateQ(sBig, a);
  95. solverBig.batchUpdateQ();
  96.  
  97. o = o1;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement