Advertisement
Guest User

d2t2.py

a guest
Apr 5th, 2020
204
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.63 KB | None | 0 0
  1. import numpy as np
  2.  
  3. prob_w = [0.1, 0.2, 0.7]
  4. states = [0, 1, 2]
  5. actions = [0, 1, 2]
  6. demand = [0, 1, 2]
  7.  
  8.  
  9. def fill_level(x, u, w):
  10.     fill_lvl = x + u - w
  11.     return fill_lvl
  12.  
  13.  
  14. def next_state(x, u, w, clip=False):
  15.     x_next = max(0, fill_level(x, u, w))
  16.     if clip:
  17.         x_next = min(x_next, 2)
  18.     return x_next
  19.  
  20.  
  21. def reward(x, u, w):
  22.     rew = -(fill_level(x, u, w) ** 2)
  23.     return rew
  24.  
  25.  
  26. def one_step(v_old):
  27.     vals, acts = [], []
  28.     for x in states:
  29.         valsx = []
  30.         for u in actions:
  31.             ret = []
  32.             for w in demand:
  33.                 xnext = next_state(x, u, w, clip=True)
  34.                 tmp = reward(x, u, w)
  35.                 tmp += v_old[xnext]
  36.                 tmp *= prob_w[w]
  37.                 ret.append(tmp)
  38.             value = np.sum(ret)
  39.             valsx.append(value)
  40.         v_curr = np.max(valsx)
  41.         vals.append(v_curr)
  42.         u_optx = np.argmax(valsx)
  43.         acts.append(u_optx)
  44.     return vals, acts
  45.  
  46.  
  47. def run_DP(T=20, V_init=[-0, -1, -2]):
  48.     values, policy = np.zeros((T+1, len(V_init))), np.zeros((T+1, len(actions)))
  49.  
  50.     v_old = V_init
  51.     values[T, ] = V_init
  52.     for t in range(T, 0, -1):
  53.         vals, acts = one_step(v_old)
  54.         values[t-1, ], policy[t-1, ] = vals, acts
  55.         v_old = vals
  56.     return values, policy
  57.  
  58.  
  59. def main():
  60.     V_T = [-1, -2, -3]
  61.     T = 20
  62.     values, policy = run_DP(T=T, V_init=V_T)
  63.     for t in range(T, 0, -1):
  64.         print('Timestep ', t, ' Value for states 0, 1, 2: ', values[t, :],
  65.               ' Optimal actions for states 0, 1, 2: ', policy[t, :])
  66.  
  67.  
  68. if __name__ == '__main__':
  69.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement