Advertisement
Abhisek92

q_value.py

Dec 5th, 2023 (edited)
798
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.71 KB | None | 0 0
  1. import numpy as np
  2.  
  3. gamma = 0.5
  4.  
  5. T = np.array(
  6.     [[[(1/2), (1/2), 0, 0, 0],
  7.      [(1/4), (1/2), (1/4), 0, 0],
  8.      [0, (1/4), (1/2), (1/4), 0],
  9.      [0, 0, (1/4), (1/2), (1/4)],
  10.      [0, 0, 0, (1/2), (1/2)]],
  11.  
  12.     [[(1/2), (1/2), 0, 0, 0],
  13.      [(1/3), (2/3), 0, 0, 0],
  14.      [0, (1/3), (2/3), 0, 0],
  15.      [0, 0, (1/3), (2/3), 0],
  16.      [0, 0, 0, (1/3), (2/3)]],
  17.  
  18.     [[(2/3), (1/3), 0, 0, 0],
  19.      [0, (2/3), (1/3), 0, 0],
  20.      [0, 0, (2/3), (1/3), 0],
  21.      [0, 0, 0, (2/3), (1/3)],
  22.      [0, 0, 0, (1/2), (1/2)]]]
  23. )
  24.  
  25. R = np.zeros((5, 5))
  26. R[4, :] = 1
  27.  
  28. V = np.zeros(5)
  29.  
  30. for i in range(200):
  31.     V = np.max(np.sum(T * (R + gamma * V), axis=2), axis=0)
  32.  
  33. # Print final value function
  34. print(V)
  35.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement