Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #QLEARNING
- #ISIR IPMC
- Q = zeros(16,4);
- g=0.2;
- nu = 0.4;
- numberOfEpisodes = 100;
- trans = [ 2, 4, 5,13;
- 1, 3, 6,14;
- 4, 2, 7,15;
- 3, 1, 8,16;
- 6, 8, 1, 9;
- 5, 7, 2,10; 8, 6, 3,11; 7, 5, 4,12; 10,12,13, 5; 9,11,14, 6; 12,10,15, 7; 11, 9,16, 8; 14,16, 9, 1; 13,15,10, 2; 16,14,11, 3; 15,13,12, 4 ];
- rew = [0,-1,0,-1;
- 0,0,0,-1;
- 0,0,0,-1;
- 0,-1,0,-1;
- -1,-1,0,0;
- 0,0,0,0;
- 0,0,0,0;
- 0,1,0,0;
- -1,-1,0,0;
- 0,0,0,0;
- 0,0,0,0;
- 0,4,0,0;
- 0,-1,0,-1;
- 0,0,0,1;
- 0,0,0,1;
- -1,0,-1,0];
- for n=1:numberOfEpisodes
- s = ceil(rand*16); #Initial state
- for n=1:16
- a = ceil(rand*3); #choose random action
- nextstate = trans(s, a); #Go to the next state
- [maxnext, dummy]= max( Q(nextstate,:) );
- #update Q
- Q(s,a) = Q(s,a) + nu*( rew(s,a) + g*maxnext - Q(s,a) );
- s = nextstate;
- end
- end
- states = 1; #Choose random start value
- [dummy, a] = max( Q(states,:) );
- states = [states, trans(states,a)];
- for r=2:16
- s = states(r);
- [dummy, a] = max( Q(s,:) );
- states = [states, trans(s,a)];
- end
- walkshow(states)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement