Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- //Initiate S
- node.state = start;
- myPath.add(node.state);
- //Given the state select action and get statePrime
- //Beware that action & statePrime are the same
- statePrime = myActions.epsilonGreedy(node.state, 0.1f, qMatrix);
- while(node.state != goal){
- actionPrime = myActions.epsilonGreedy(statePrime, 0.1f, qMatrix);
- //- - - - - - - - - - Cliff - - - - - - - - - - - - -
- if(rMatrix[node.state][statePrime] == -100){
- //Walked into the cliff!
- //Update Q Matrix
- myQ = qMatrix[statePrime][actionPrime];
- qMatrix[node.state][statePrime] = alpha*(rMatrix[node.state][statePrime] + myQ)+ (1-alpha)*qMatrix[node.state][statePrime];
- //Agent is returned to the start
- node.state = start;
- statePrime = myActions.epsilonGreedy(node.state, 0.1f, qMatrix);
- myPath.add(node.state);
- }
- else{
- //No cliff, continue
- myReward.add(rMatrix[node.state][statePrime]);
- //Update Q Matrix
- myQ = qMatrix[statePrime][actionPrime];
- qMatrix[node.state][statePrime] = alpha*(rMatrix[node.state][statePrime] + myQ)+ (1-alpha)*qMatrix[node.state][statePrime];
- node.state = statePrime;
- statePrime = actionPrime;
- myPath.add(node.state);
- }
- public Integer epsilonGreedy(Integer state, Float epsilon, Float[][] qMatrix){
- Random random = new Random();
- Float value = random.nextFloat();
- Integer statePrime = 0;
- if(value < epsilon){
- myNode.state = state;
- statePrime = random(myNode.neighbors());
- }
- else{
- statePrime = greedy(state, qMatrix);
- }
- return statePrime;
- }
- (state,action)==(state,statePrime)
- (state', action')==(statePrime,actionPrime)
- 000000000000
- 111111111111
- 100000000001
- 100000000001
- 111111111111
- 100000000001
- 100000000001
- 100000000001
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement