Guest User

chaser.py

a guest
Oct 2nd, 2018
27
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. from gym import Env
  2. from gym.spaces import Discrete
  3. from gym.spaces import Box
  4. import numpy as np
  5. import math
  6.  
  7.  
  8. class Creature(object):
  9.  
  10.     MIN_X = 0.0
  11.     MAX_X = 1.0
  12.  
  13.     MIN_Y = 0.0
  14.     MAX_Y = 1.0
  15.  
  16.     MIN_BEARING = 0.0
  17.     MAX_BEARING = 2 * math.pi
  18.  
  19.     def __init__(self, x, y, bearing):
  20.         self.x = x
  21.         self.y = y
  22.         self.bearing = bearing
  23.  
  24.     def move(self, velocity):
  25.         self.x += np.cos(self.bearing) * velocity
  26.         self.y += np.sin(self.bearing) * velocity
  27.  
  28.     def turn(self, radians):
  29.         self.bearing += radians
  30.  
  31.     def distance_to(self, other_creature):
  32.         dx = other_creature.x - self.x
  33.         dy = other_creature.y - self.y
  34.         return np.sqrt(dx * dx + dy * dy)
  35.  
  36.     def unwrap(self):
  37.         return np.array([self.x, self.y, self.bearing])
  38.  
  39.     def new_random():
  40.         return Creature(np.random.uniform(Creature.MIN_X, Creature.MAX_X),
  41.                         np.random.uniform(Creature.MIN_Y, Creature.MAX_Y),
  42.                         np.random.uniform(Creature.MIN_BEARING, Creature.MAX_BEARING))
  43.  
  44.  
  45. class Chaser(Env):
  46.  
  47.     ACTION_LEFT = 0
  48.     ACTION_RIGHT = 1
  49.     ACTION_NONE = 2
  50.     DELTA_BEARING = 0.12
  51.     AGENT_VELOCITY = 0.009
  52.     RABBIT_VELOCITY = 0.003
  53.     RABBIT_BEARING_DELTA = 0.3
  54.     CATCH_RADIUS = 0.1
  55.     MAX_STEPS = 260
  56.     REWARD_NO_CATCH = -0.1
  57.     CATCH_REWARD = 30.0
  58.  
  59.     def __init__(self):
  60.         self.action_space = Discrete(3)
  61.         self.observation_space = Box(
  62.             low=np.array([
  63.                 Creature.MIN_X,
  64.                 Creature.MIN_Y,
  65.                 Creature.MIN_BEARING,
  66.                 Creature.MIN_X,
  67.                 Creature.MIN_Y,
  68.                 Creature.MIN_BEARING]
  69.             ),
  70.             high=np.array([
  71.                 Creature.MAX_X,
  72.                 Creature.MAX_Y,
  73.                 Creature.MAX_BEARING,
  74.                 Creature.MAX_X,
  75.                 Creature.MAX_Y,
  76.                 Creature.MAX_BEARING]
  77.             )
  78.         )
  79.         self.reward_range = (Chaser.MAX_STEPS * Chaser.REWARD_NO_CATCH, Chaser.CATCH_REWARD)
  80.         self.state_size = np.prod(self.observation_space.shape)
  81.         self.viewer = None
  82.  
  83.     def step(self, action):
  84.         assert self.action_space.contains(action)
  85.         self.steps += 1
  86.  
  87.         def get_agent_bearing_delta():
  88.             if action == Chaser.ACTION_LEFT:
  89.                 return -Chaser.DELTA_BEARING
  90.             elif action == Chaser.ACTION_RIGHT:
  91.                 return Chaser.DELTA_BEARING
  92.             return 0.0
  93.  
  94.         self.agent.move(Chaser.AGENT_VELOCITY)
  95.         self.agent.turn(get_agent_bearing_delta())
  96.  
  97.         self.rabbit.move(Chaser.RABBIT_VELOCITY)
  98.         # Make sure rabbit does not escape screen
  99.         self.rabbit.x = np.clip(self.rabbit.x, Creature.MIN_X, Creature.MAX_X)
  100.         self.rabbit.y = np.clip(self.rabbit.y, Creature.MIN_Y, Creature.MAX_Y)
  101.         self.rabbit.turn(np.random.choice([-Chaser.RABBIT_BEARING_DELTA, 0.0, Chaser.RABBIT_BEARING_DELTA], 1)[0])
  102.  
  103.         reached_goal = self.agent.distance_to(self.rabbit) < Chaser.CATCH_RADIUS
  104.         reward = Chaser.CATCH_REWARD if reached_goal else Chaser.REWARD_NO_CATCH
  105.         done = self.steps >= Chaser.MAX_STEPS or reached_goal
  106.         return self.unwrap_state(), reward, done, {}
  107.  
  108.     def reset(self):
  109.         self.steps = 0
  110.         self.agent = Creature.new_random()
  111.         self.rabbit = Creature.new_random()
  112.         while self.agent.distance_to(self.rabbit) < Chaser.CATCH_RADIUS * 4.0:
  113.             self.agent = Creature.new_random()
  114.             self.rabbit = Creature.new_random()
  115.         return self.unwrap_state()
  116.  
  117.     def unwrap_state(self):
  118.         return np.concatenate((self.agent.unwrap(), self.rabbit.unwrap()))
  119.  
  120.     def render(self, mode='human'):
  121.         screen_width = 400
  122.         screen_height = 400
  123.  
  124.         world_width = Creature.MAX_X
  125.         scale = screen_width / world_width
  126.  
  127.         if self.viewer is None:
  128.             from gym.envs.classic_control import rendering
  129.             self.viewer = rendering.Viewer(screen_width, screen_height)
  130.  
  131.             agent = rendering.make_circle(radius=5, res=5, filled=True)
  132.             self.agent_translation = rendering.Transform()
  133.             agent.add_attr(self.agent_translation)
  134.  
  135.             rabbit = rendering.make_circle(radius=3, res=5, filled=True)
  136.             self.rabbit_translation = rendering.Transform()
  137.             rabbit.add_attr(self.rabbit_translation)
  138.  
  139.             self.viewer.add_geom(agent)
  140.             self.viewer.add_geom(rabbit)
  141.  
  142.         self.agent_translation.set_translation(self.agent.x * scale, self.agent.y * scale)
  143.         self.agent_translation.set_rotation(self.agent.bearing)
  144.  
  145.         self.rabbit_translation.set_translation(self.rabbit.x * scale, self.rabbit.y * scale)
  146.         self.rabbit_translation.set_rotation(self.rabbit.bearing)
  147.  
  148.         return self.viewer.render(return_rgb_array=mode=='rgb_array')
  149.  
  150.     def close(self):
  151.         pass
  152.  
  153.     def seed(self, seed=None):
  154.         pass
  155.  
  156.  
  157. if __name__ == '__main__':
  158.     env = Chaser()
  159.     episodes = 10000
  160.     for i in range(episodes):
  161.         done = False
  162.         state = env.reset()
  163.         episode_reward = 0
  164.         while not done:
  165.             env.render()
  166.             next_state, reward, done, _ = env.step(env.action_space.sample())
  167.             episode_reward += reward
  168.         print('Episode reward %s' % episode_reward)
RAW Paste Data