Egreedy

%QLEARNING
%WITH
%EPSILON GREEDY POLICY

Q = zeros(16,4);

g=0.2;
nu = 0.4;

numberOfEpisodes = 100;


trans = [ 2, 4, 5,13;
 1, 3, 6,14;
 4, 2, 7,15;
 3, 1, 8,16;
 6, 8, 1, 9;
 5, 7, 2,10; 8, 6, 3,11; 7, 5, 4,12; 10,12,13, 5; 9,11,14, 6; 12,10,15, 7; 11, 9,16, 8; 14,16, 9, 1; 13,15,10, 2; 16,14,11, 3; 15,13,12, 4 ];

rew = [0,-1,0,-1;
    0,0,0,-1;
    0,0,0,-1;
    0,-1,0,-1;
    -1,-1,0,0;
    0,0,0,0;
    0,0,0,0;
    0,1,0,0;
    -1,-1,0,0;
    0,0,0,0;
    0,0,0,0;
    0,4,0,0;
    0,-1,0,-1;
    0,0,0,1;
    0,0,0,1;
    -1,0,-1,0];


 epsilon = 0.99;


for n=1:numberOfEpisodes

s = ceil(rand*16); %Initial state

  for n=1:16

    if rand<epsilon %EXPLORE
      a = ceil(rand*3); #choose random action
    else %EXPLOIT
      [dummy, a] = max( Q(s,:) );
    end

      nextstate = trans(s, a); %Go to the next state
      [maxnext, dummy]= max( Q(nextstate,:) );
      %update Q
      Q(s,a) = Q(s,a) + nu*( rew(s,a) + g*maxnext - Q(s,a) );
      s = nextstate;

  end

  epsilon*=0.9

end


states = 1; %Choose random start value

[dummy, a] = max( Q(states,:) );

states = [states, trans(states,a)];

for r=2:16

   s = states(r);

  [dummy, a] = max( Q(s,:) );

    states = [states, trans(s,a)];

end

walkshow(states)