Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def make_step(model, target_model, optimizer, criterion, observation, action, reward, next_observation):
- inp_obv = torch.Tensor(observation)
- q = model(inp_obv)
- q_argmax = torch.argmax(q.data)
- q = q[action]
- inp_next_obv = torch.Tensor(next_observation)
- q_next = target_model(inp_next_obv)
- q_a_next = q_next[q_argmax]
- #LHS of the double DQN equation
- obv_reward = q
- #RHS of the double DQN equation
- target_reward = torch.Tensor([reward]) + GAMMA*q_a_next.detach()
- #Backprop
- loss = criterion(obv_reward, target_reward) #MSELoss
- loss.backward()
- optimizer.zero_grad() #RMSprop on net
- if e%2 == 0:
- target_net.load_state_dict(net.state_dict())
- for i in range(len(data)):
- observation, action, reward, next_observation = data[i]
- make_step(net, target_net, optimizer, criterion, observation, action, reward, next_observation)
- GAMMA *= GAMMA
- optimizer.step()
Add Comment
Please, Sign In to add comment