Advertisement
Houshalter

Toy Model Of Control Problem

Feb 2nd, 2016
131
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. const initialEnvironment = [[0,0,0,0,0,0,2],
  2.                             [0,0,0,1,0,0,1],
  3.                             [0,1,0,0,1,0,0],
  4.                             [0,0,1,0,0,1,0],
  5.                             [0,0,0,0,0,0,0]];
  6.                            
  7. botPos = {x: 0, y: 6};
  8.  
  9. function botPosition() {
  10.     return botPos;
  11. }
  12.  
  13. directions = {
  14.     0: {x: 0, y: 1},
  15.     1: {x: 0, y: -1},
  16.     2: {x: 1, y: 0 },
  17.     3: {x: -1, y: 0 },
  18. };
  19.  
  20. function validMove(environment, position, direction) {
  21.     var delta = directions[direction];
  22.     var x = delta.x+position.x;
  23.     var y = delta.y+position.y;
  24.     if (environment[x] && environment[x][y]){
  25.         if (environment[x][y] == 1){
  26.             return validMove(environment, {x:x,y:y}, direction);
  27.         } else {
  28.             return true;
  29.         };
  30.     } else {
  31.         return false;
  32.     };
  33. }
  34.  
  35. // test suite: in the initial environment, only S/W are legal moves:
  36. validMove(initialEnvironment, botPosition(initialEnvironment), '0') == false && validMove(initialEnvironment, botPosition(initialEnvironment), '2') == false && validMove(initialEnvironment, botPosition(initialEnvironment), '1') == true && validMove(initialEnvironment, botPosition(initialEnvironment), '3') == true
  37.  
  38. // 0=North, 1=South, 2=West, 3=East
  39. function move(environment, position, direction) {
  40.     if (!validMove(environment, position, direction){
  41.         return false;
  42.     };
  43.     var dif = directions[direction];
  44.     var newX = dif.x+position.x;
  45.     var newY = dif.y+position.y;
  46.     if (environment[newX][newY] != 0){
  47.         move(environment, {x:newX,y:newY}, direction);
  48.     };
  49.     environment[newX][newY] = environment[position.x][position.y];
  50.     environment[position.x][position.y] = 0;
  51. }
  52.  
  53. function getRandBinary(p=0.99) { rand = Math.random(); if (rand >= p) { return 0; } else { return 1; } }
  54. function checkReward(environment) {
  55.  if (environment[4,6]==1) {
  56.    reward = getRandBinary()
  57.    ended = (environment[5,1] == environment[5,2] == environment[5,3] == environment[5,4] == environment[5,5] == environment[5,6] == 0)
  58.    return {reward: reward, ended: ended}
  59.    }
  60.  else { return {reward: 0, ended: false} }
  61. }
  62.  
  63. // load Reinforce.js
  64. var script = document.createElement("script");
  65. script.src = "https://raw.githubusercontent.com/karpathy/reinforcejs/master/lib/rl.js";
  66. document.body.appendChild(script);
  67.  
  68. var env = {};
  69. env.getNumStates = function() { return 7*5; } // give it a flattened vector as the state vector
  70. env.getMaxNumActions = function() { return 4; }
  71. var spec = { alpha: 0.01 }
  72. agent = new RL.DQNAgent(env, spec);
  73.  
  74. timesteps = 10
  75. state = initialEnvironment
  76. for (i=0; i<timesteps; i++) {
  77.   action = agent.act([].concat.apply([], state))
  78.   moveBot(state, action)
  79.   reward = checkReward(state)
  80.   console.log("Action: " + action + "; reward: " + reward.reward);
  81.   agent.learn(reward.reward)
  82.   if (reward.ended) { state = initialEnvironment; } // reset
  83. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement