Untitled

import numpy as np

'''
Verifying how Silver et al's Sarsa definition works in a very simple toy problem
'''

alpha = 0.1
gamma = 1
lamda = 0.9

RIGHT, LEFT, UP, DOWN =  (0, 1, 2, 3)

e = np.zeros((4,3),float)
w = np.array([
    [4, -3, 1],  # weights of right
    [0, 0, 0],   # weights of left
    [2, -2, 5],  # weights of up
    [-1, -1, -1] # weights of down
])

print('w before:\n', w)

f = np.array([
    [1, 0, 0], # features of s0
    [0, 1, 0], # features of s1
    [0, 0, 1], # features of s2
])

# the stage is set. agent starts in s0, performs action right, receives r=+1, ends in s1, choosing action up
s = 0
a = RIGHT
next_s = 1
r = 1
next_a = UP

delta = r + gamma * w[next_a].dot(f[next_s]) - w[a].dot(f[s])
delta # should equal -5

w = w + alpha * delta * e # nothing changed (but should have!)

e = gamma * lamda * e
e[s] = e[s] + f[s]

print('w after:\n', w) # that is, the value of action RIGHT in s0 has not changed!
print('e:\n', e)


# agent is in s1, performs up, reaches s2, gains r = -2 choosing action right
s = next_s
a = next_a

next_s = 2
r = -2
next_a = RIGHT

print('w before:\n', w)

delta = r + gamma * w[next_a].dot(f[next_s]) - w[a].dot(f[s])
delta #should be 1

w = w + alpha * delta * e # value of (RIGHT,s0) should rise a little bit; value of (UP,s1) should rise a little bit, but does not

e = gamma * lamda * e
e[s] = e[s] + f[s]

print('w after:\n', w) # that is, the value of (UP,s1) has not changed!
print('e:\n', e)