Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- const initialEnvironment = [[0,0,0,0,0,0,2],
- [0,0,0,1,0,0,1],
- [0,1,0,0,1,0,0],
- [0,0,1,0,0,1,0],
- [0,0,0,0,0,0,0]];
- botPos = {x: 0, y: 6};
- function botPosition() {
- return botPos;
- }
- directions = {
- 0: {x: 0, y: 1},
- 1: {x: 0, y: -1},
- 2: {x: 1, y: 0 },
- 3: {x: -1, y: 0 },
- };
- function validMove(environment, position, direction) {
- var delta = directions[direction];
- var x = delta.x+position.x;
- var y = delta.y+position.y;
- if (environment[x] && environment[x][y]){
- if (environment[x][y] == 1){
- return validMove(environment, {x:x,y:y}, direction);
- } else {
- return true;
- };
- } else {
- return false;
- };
- }
- // test suite: in the initial environment, only S/W are legal moves:
- validMove(initialEnvironment, botPosition(initialEnvironment), '0') == false && validMove(initialEnvironment, botPosition(initialEnvironment), '2') == false && validMove(initialEnvironment, botPosition(initialEnvironment), '1') == true && validMove(initialEnvironment, botPosition(initialEnvironment), '3') == true
- // 0=North, 1=South, 2=West, 3=East
- function move(environment, position, direction) {
- if (!validMove(environment, position, direction){
- return false;
- };
- var dif = directions[direction];
- var newX = dif.x+position.x;
- var newY = dif.y+position.y;
- if (environment[newX][newY] != 0){
- move(environment, {x:newX,y:newY}, direction);
- };
- environment[newX][newY] = environment[position.x][position.y];
- environment[position.x][position.y] = 0;
- }
- function getRandBinary(p=0.99) { rand = Math.random(); if (rand >= p) { return 0; } else { return 1; } }
- function checkReward(environment) {
- if (environment[4,6]==1) {
- reward = getRandBinary()
- ended = (environment[5,1] == environment[5,2] == environment[5,3] == environment[5,4] == environment[5,5] == environment[5,6] == 0)
- return {reward: reward, ended: ended}
- }
- else { return {reward: 0, ended: false} }
- }
- // load Reinforce.js
- var script = document.createElement("script");
- script.src = "https://raw.githubusercontent.com/karpathy/reinforcejs/master/lib/rl.js";
- document.body.appendChild(script);
- var env = {};
- env.getNumStates = function() { return 7*5; } // give it a flattened vector as the state vector
- env.getMaxNumActions = function() { return 4; }
- var spec = { alpha: 0.01 }
- agent = new RL.DQNAgent(env, spec);
- timesteps = 10
- state = initialEnvironment
- for (i=0; i<timesteps; i++) {
- action = agent.act([].concat.apply([], state))
- moveBot(state, action)
- reward = checkReward(state)
- console.log("Action: " + action + "; reward: " + reward.reward);
- agent.learn(reward.reward)
- if (reward.ended) { state = initialEnvironment; } // reset
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement