Advertisement
Guest User

chaser.py

a guest
Oct 2nd, 2018
141
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.37 KB | None | 0 0
  1. from gym import Env
  2. from gym.spaces import Discrete
  3. from gym.spaces import Box
  4. import numpy as np
  5. import math
  6.  
  7.  
  8. class Creature(object):
  9.  
  10.     MIN_X = 0.0
  11.     MAX_X = 1.0
  12.  
  13.     MIN_Y = 0.0
  14.     MAX_Y = 1.0
  15.  
  16.     MIN_BEARING = 0.0
  17.     MAX_BEARING = 2 * math.pi
  18.  
  19.     def __init__(self, x, y, bearing):
  20.         self.x = x
  21.         self.y = y
  22.         self.bearing = bearing
  23.  
  24.     def move(self, velocity):
  25.         self.x += np.cos(self.bearing) * velocity
  26.         self.y += np.sin(self.bearing) * velocity
  27.  
  28.     def turn(self, radians):
  29.         self.bearing += radians
  30.  
  31.     def distance_to(self, other_creature):
  32.         dx = other_creature.x - self.x
  33.         dy = other_creature.y - self.y
  34.         return np.sqrt(dx * dx + dy * dy)
  35.  
  36.     def unwrap(self):
  37.         return np.array([self.x, self.y, self.bearing])
  38.  
  39.     def new_random():
  40.         return Creature(np.random.uniform(Creature.MIN_X, Creature.MAX_X),
  41.                         np.random.uniform(Creature.MIN_Y, Creature.MAX_Y),
  42.                         np.random.uniform(Creature.MIN_BEARING, Creature.MAX_BEARING))
  43.  
  44.  
  45. class Chaser(Env):
  46.  
  47.     ACTION_LEFT = 0
  48.     ACTION_RIGHT = 1
  49.     ACTION_NONE = 2
  50.     DELTA_BEARING = 0.12
  51.     AGENT_VELOCITY = 0.009
  52.     RABBIT_VELOCITY = 0.003
  53.     RABBIT_BEARING_DELTA = 0.3
  54.     CATCH_RADIUS = 0.1
  55.     MAX_STEPS = 260
  56.     REWARD_NO_CATCH = -0.1
  57.     CATCH_REWARD = 30.0
  58.  
  59.     def __init__(self):
  60.         self.action_space = Discrete(3)
  61.         self.observation_space = Box(
  62.             low=np.array([
  63.                 Creature.MIN_X,
  64.                 Creature.MIN_Y,
  65.                 Creature.MIN_BEARING,
  66.                 Creature.MIN_X,
  67.                 Creature.MIN_Y,
  68.                 Creature.MIN_BEARING]
  69.             ),
  70.             high=np.array([
  71.                 Creature.MAX_X,
  72.                 Creature.MAX_Y,
  73.                 Creature.MAX_BEARING,
  74.                 Creature.MAX_X,
  75.                 Creature.MAX_Y,
  76.                 Creature.MAX_BEARING]
  77.             )
  78.         )
  79.         self.reward_range = (Chaser.MAX_STEPS * Chaser.REWARD_NO_CATCH, Chaser.CATCH_REWARD)
  80.         self.state_size = np.prod(self.observation_space.shape)
  81.         self.viewer = None
  82.  
  83.     def step(self, action):
  84.         assert self.action_space.contains(action)
  85.         self.steps += 1
  86.  
  87.         def get_agent_bearing_delta():
  88.             if action == Chaser.ACTION_LEFT:
  89.                 return -Chaser.DELTA_BEARING
  90.             elif action == Chaser.ACTION_RIGHT:
  91.                 return Chaser.DELTA_BEARING
  92.             return 0.0
  93.  
  94.         self.agent.move(Chaser.AGENT_VELOCITY)
  95.         self.agent.turn(get_agent_bearing_delta())
  96.  
  97.         self.rabbit.move(Chaser.RABBIT_VELOCITY)
  98.         # Make sure rabbit does not escape screen
  99.         self.rabbit.x = np.clip(self.rabbit.x, Creature.MIN_X, Creature.MAX_X)
  100.         self.rabbit.y = np.clip(self.rabbit.y, Creature.MIN_Y, Creature.MAX_Y)
  101.         self.rabbit.turn(np.random.choice([-Chaser.RABBIT_BEARING_DELTA, 0.0, Chaser.RABBIT_BEARING_DELTA], 1)[0])
  102.  
  103.         reached_goal = self.agent.distance_to(self.rabbit) < Chaser.CATCH_RADIUS
  104.         reward = Chaser.CATCH_REWARD if reached_goal else Chaser.REWARD_NO_CATCH
  105.         done = self.steps >= Chaser.MAX_STEPS or reached_goal
  106.         return self.unwrap_state(), reward, done, {}
  107.  
  108.     def reset(self):
  109.         self.steps = 0
  110.         self.agent = Creature.new_random()
  111.         self.rabbit = Creature.new_random()
  112.         while self.agent.distance_to(self.rabbit) < Chaser.CATCH_RADIUS * 4.0:
  113.             self.agent = Creature.new_random()
  114.             self.rabbit = Creature.new_random()
  115.         return self.unwrap_state()
  116.  
  117.     def unwrap_state(self):
  118.         return np.concatenate((self.agent.unwrap(), self.rabbit.unwrap()))
  119.  
  120.     def render(self, mode='human'):
  121.         screen_width = 400
  122.         screen_height = 400
  123.  
  124.         world_width = Creature.MAX_X
  125.         scale = screen_width / world_width
  126.  
  127.         if self.viewer is None:
  128.             from gym.envs.classic_control import rendering
  129.             self.viewer = rendering.Viewer(screen_width, screen_height)
  130.  
  131.             agent = rendering.make_circle(radius=5, res=5, filled=True)
  132.             self.agent_translation = rendering.Transform()
  133.             agent.add_attr(self.agent_translation)
  134.  
  135.             rabbit = rendering.make_circle(radius=3, res=5, filled=True)
  136.             self.rabbit_translation = rendering.Transform()
  137.             rabbit.add_attr(self.rabbit_translation)
  138.  
  139.             self.viewer.add_geom(agent)
  140.             self.viewer.add_geom(rabbit)
  141.  
  142.         self.agent_translation.set_translation(self.agent.x * scale, self.agent.y * scale)
  143.         self.agent_translation.set_rotation(self.agent.bearing)
  144.  
  145.         self.rabbit_translation.set_translation(self.rabbit.x * scale, self.rabbit.y * scale)
  146.         self.rabbit_translation.set_rotation(self.rabbit.bearing)
  147.  
  148.         return self.viewer.render(return_rgb_array=mode=='rgb_array')
  149.  
  150.     def close(self):
  151.         pass
  152.  
  153.     def seed(self, seed=None):
  154.         pass
  155.  
  156.  
  157. if __name__ == '__main__':
  158.     env = Chaser()
  159.     episodes = 10000
  160.     for i in range(episodes):
  161.         done = False
  162.         state = env.reset()
  163.         episode_reward = 0
  164.         while not done:
  165.             env.render()
  166.             next_state, reward, done, _ = env.step(env.action_space.sample())
  167.             episode_reward += reward
  168.         print('Episode reward %s' % episode_reward)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement