Toy Model Of Control Problem

const initialEnvironment = [[0,0,0,0,0,0,2],
                            [0,0,0,1,0,0,1],
                            [0,1,0,0,1,0,0],
                            [0,0,1,0,0,1,0],
                            [0,0,0,0,0,0,0]];

botPos = {x: 0, y: 6};

function botPosition() {
    return botPos;
}

directions = {
    0: {x: 0, y: 1},
    1: {x: 0, y: -1},
    2: {x: 1, y: 0 },
    3: {x: -1, y: 0 },
};

function validMove(environment, position, direction) {
    var delta = directions[direction];
    var x = delta.x+position.x;
    var y = delta.y+position.y;
    if (environment[x] && environment[x][y]){
        if (environment[x][y] == 1){
            return validMove(environment, {x:x,y:y}, direction);
        } else {
            return true;
        };
    } else {
        return false;
    };
}

// test suite: in the initial environment, only S/W are legal moves:
validMove(initialEnvironment, botPosition(initialEnvironment), '0') == false && validMove(initialEnvironment, botPosition(initialEnvironment), '2') == false && validMove(initialEnvironment, botPosition(initialEnvironment), '1') == true && validMove(initialEnvironment, botPosition(initialEnvironment), '3') == true

// 0=North, 1=South, 2=West, 3=East
function move(environment, position, direction) {
    if (!validMove(environment, position, direction){
        return false;
    };
    var dif = directions[direction];
    var newX = dif.x+position.x;
    var newY = dif.y+position.y;
    if (environment[newX][newY] != 0){
        move(environment, {x:newX,y:newY}, direction);
    };
    environment[newX][newY] = environment[position.x][position.y];
    environment[position.x][position.y] = 0;
}

function getRandBinary(p=0.99) { rand = Math.random(); if (rand >= p) { return 0; } else { return 1; } }
function checkReward(environment) {
 if (environment[4,6]==1) {
   reward = getRandBinary()
   ended = (environment[5,1] == environment[5,2] == environment[5,3] == environment[5,4] == environment[5,5] == environment[5,6] == 0)
   return {reward: reward, ended: ended}
   }
 else { return {reward: 0, ended: false} }
}

// load Reinforce.js
var script = document.createElement("script");
script.src = "https://raw.githubusercontent.com/karpathy/reinforcejs/master/lib/rl.js";
document.body.appendChild(script);

var env = {};
env.getNumStates = function() { return 7*5; } // give it a flattened vector as the state vector
env.getMaxNumActions = function() { return 4; }
var spec = { alpha: 0.01 }
agent = new RL.DQNAgent(env, spec);

timesteps = 10
state = initialEnvironment
for (i=0; i<timesteps; i++) {
  action = agent.act([].concat.apply([], state))
  moveBot(state, action)
  reward = checkReward(state)
  console.log("Action: " + action + "; reward: " + reward.reward);
  agent.learn(reward.reward)
  if (reward.ended) { state = initialEnvironment; } // reset
}