Advertisement
Guest User

Untitled

a guest
Jul 24th, 2019
125
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.31 KB | None | 0 0
  1. device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  2.  
  3. env = gym.make('MountainCarContinuous-v0')
  4. env.seed(101)
  5. np.random.seed(101)
  6.  
  7. print('observation space:', env.observation_space)
  8. print('action space:', env.action_space)
  9. print(' - low:', env.action_space.low)
  10. print(' - high:', env.action_space.high)
  11.  
  12. class Agent(nn.Module):
  13. def __init__(self, env, h_size=16):
  14. super(Agent, self).__init__()
  15. self.env = env
  16. # state, hidden layer, action sizes
  17. self.s_size = env.observation_space.shape[0]
  18. self.h_size = h_size
  19. self.a_size = env.action_space.shape[0]
  20. # define layers
  21. self.fc1 = nn.Linear(self.s_size, self.h_size)
  22. self.fc2 = nn.Linear(self.h_size, self.a_size)
  23.  
  24. def set_weights(self, weights):
  25. s_size = self.s_size
  26. h_size = self.h_size
  27. a_size = self.a_size
  28. # separate the weights for each layer
  29. fc1_end = (s_size*h_size)+h_size
  30. fc1_W = torch.from_numpy(weights[:s_size*h_size].reshape(s_size, h_size))
  31. fc1_b = torch.from_numpy(weights[s_size*h_size:fc1_end])
  32. fc2_W = torch.from_numpy(weights[fc1_end:fc1_end+(h_size*a_size)].reshape(h_size, a_size))
  33. fc2_b = torch.from_numpy(weights[fc1_end+(h_size*a_size):])
  34. # set the weights for each layer
  35. self.fc1.weight.data.copy_(fc1_W.view_as(self.fc1.weight.data))
  36. self.fc1.bias.data.copy_(fc1_b.view_as(self.fc1.bias.data))
  37. self.fc2.weight.data.copy_(fc2_W.view_as(self.fc2.weight.data))
  38. self.fc2.bias.data.copy_(fc2_b.view_as(self.fc2.bias.data))
  39.  
  40. def get_weights_dim(self):
  41. return (self.s_size+1)*self.h_size + (self.h_size+1)*self.a_size
  42.  
  43. def forward(self, x):
  44. x = F.relu(self.fc1(x))
  45. x = F.tanh(self.fc2(x))
  46. return x.cpu().data
  47.  
  48. def evaluate(self, weights, gamma=1.0, max_t=5000):
  49. self.set_weights(weights)
  50. episode_return = 0.0
  51. state = self.env.reset()
  52. for t in range(max_t):
  53. state = torch.from_numpy(state).float().to(device)
  54. action = self.forward(state)
  55. state, reward, done, _ = self.env.step(action)
  56. episode_return += reward * math.pow(gamma, t)
  57. if done:
  58. break
  59. return episode_return
  60.  
  61. agent = Agent(env).to(device)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement