Advertisement
Guest User

Untitled

a guest
Aug 28th, 2015
72
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.84 KB | None | 0 0
  1. //Initiate S
  2. node.state = start;
  3. myPath.add(node.state);
  4.  
  5. //Given the state select action and get statePrime
  6. //Beware that action & statePrime are the same
  7. statePrime = myActions.epsilonGreedy(node.state, 0.1f, qMatrix);
  8.  
  9. while(node.state != goal){
  10.  
  11. actionPrime = myActions.epsilonGreedy(statePrime, 0.1f, qMatrix);
  12.  
  13. //- - - - - - - - - - Cliff - - - - - - - - - - - - -
  14. if(rMatrix[node.state][statePrime] == -100){
  15. //Walked into the cliff!
  16. //Update Q Matrix
  17. myQ = qMatrix[statePrime][actionPrime];
  18.  
  19. qMatrix[node.state][statePrime] = alpha*(rMatrix[node.state][statePrime] + myQ)+ (1-alpha)*qMatrix[node.state][statePrime];
  20.  
  21. //Agent is returned to the start
  22. node.state = start;
  23. statePrime = myActions.epsilonGreedy(node.state, 0.1f, qMatrix);
  24.  
  25. myPath.add(node.state);
  26. }
  27.  
  28. else{
  29. //No cliff, continue
  30. myReward.add(rMatrix[node.state][statePrime]);
  31. //Update Q Matrix
  32. myQ = qMatrix[statePrime][actionPrime];
  33.  
  34. qMatrix[node.state][statePrime] = alpha*(rMatrix[node.state][statePrime] + myQ)+ (1-alpha)*qMatrix[node.state][statePrime];
  35.  
  36. node.state = statePrime;
  37. statePrime = actionPrime;
  38.  
  39. myPath.add(node.state);
  40. }
  41.  
  42. public Integer epsilonGreedy(Integer state, Float epsilon, Float[][] qMatrix){
  43. Random random = new Random();
  44. Float value = random.nextFloat();
  45. Integer statePrime = 0;
  46.  
  47. if(value < epsilon){
  48. myNode.state = state;
  49. statePrime = random(myNode.neighbors());
  50. }
  51. else{
  52. statePrime = greedy(state, qMatrix);
  53. }
  54. return statePrime;
  55. }
  56.  
  57. (state,action)==(state,statePrime)
  58. (state', action')==(statePrime,actionPrime)
  59.  
  60. 000000000000
  61. 111111111111
  62. 100000000001
  63. 100000000001
  64.  
  65. 111111111111
  66. 100000000001
  67. 100000000001
  68. 100000000001
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement