Untitled

#QLEARNING

#ISIR IPMC
Q = zeros(16,4);

g=0.2;
nu = 0.4;

numberOfEpisodes = 100;


trans = [ 2, 4, 5,13;
 1, 3, 6,14;
 4, 2, 7,15;
 3, 1, 8,16;
 6, 8, 1, 9;
 5, 7, 2,10; 8, 6, 3,11; 7, 5, 4,12; 10,12,13, 5; 9,11,14, 6; 12,10,15, 7; 11, 9,16, 8; 14,16, 9, 1; 13,15,10, 2; 16,14,11, 3; 15,13,12, 4 ];

rew = [0,-1,0,-1;
    0,0,0,-1;
    0,0,0,-1;
    0,-1,0,-1;
    -1,-1,0,0;
    0,0,0,0;
    0,0,0,0;
    0,1,0,0;
    -1,-1,0,0;
    0,0,0,0;
    0,0,0,0;
    0,4,0,0;
    0,-1,0,-1;
    0,0,0,1;
    0,0,0,1;
    -1,0,-1,0];


for n=1:numberOfEpisodes

s = ceil(rand*16); #Initial state

  for n=1:16

    a = ceil(rand*3); #choose random action
    nextstate = trans(s, a); #Go to the next state

    [maxnext, dummy]= max( Q(nextstate,:) );

    #update Q
    Q(s,a) = Q(s,a) + nu*( rew(s,a) + g*maxnext - Q(s,a) );

    s = nextstate;


  end

end


states = 1; #Choose random start value

[dummy, a] = max( Q(states,:) );

states = [states, trans(states,a)];

for r=2:16

   s = states(r);

  [dummy, a] = max( Q(s,:) );

    states = [states, trans(s,a)];

end

walkshow(states)