Guest User

Untitled

a guest
Aug 15th, 2018
77
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.91 KB | None | 0 0
  1. def make_step(model, target_model, optimizer, criterion, observation, action, reward, next_observation):
  2. inp_obv = torch.Tensor(observation)
  3. q = model(inp_obv)
  4. q_argmax = torch.argmax(q.data)
  5. q = q[action]
  6.  
  7. inp_next_obv = torch.Tensor(next_observation)
  8. q_next = target_model(inp_next_obv)
  9. q_a_next = q_next[q_argmax]
  10.  
  11. #LHS of the double DQN equation
  12. obv_reward = q
  13.  
  14. #RHS of the double DQN equation
  15. target_reward = torch.Tensor([reward]) + GAMMA*q_a_next.detach()
  16.  
  17. #Backprop
  18. loss = criterion(obv_reward, target_reward) #MSELoss
  19. loss.backward()
  20.  
  21. optimizer.zero_grad() #RMSprop on net
  22. if e%2 == 0:
  23. target_net.load_state_dict(net.state_dict())
  24. for i in range(len(data)):
  25. observation, action, reward, next_observation = data[i]
  26. make_step(net, target_net, optimizer, criterion, observation, action, reward, next_observation)
  27.  
  28. GAMMA *= GAMMA
  29. optimizer.step()
Add Comment
Please, Sign In to add comment