Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from gym import Env
- from gym.spaces import Discrete
- from gym.spaces import Box
- import numpy as np
- import math
- class Creature(object):
- MIN_X = 0.0
- MAX_X = 1.0
- MIN_Y = 0.0
- MAX_Y = 1.0
- MIN_BEARING = 0.0
- MAX_BEARING = 2 * math.pi
- def __init__(self, x, y, bearing):
- self.x = x
- self.y = y
- self.bearing = bearing
- def move(self, velocity):
- self.x += np.cos(self.bearing) * velocity
- self.y += np.sin(self.bearing) * velocity
- def turn(self, radians):
- self.bearing += radians
- def distance_to(self, other_creature):
- dx = other_creature.x - self.x
- dy = other_creature.y - self.y
- return np.sqrt(dx * dx + dy * dy)
- def unwrap(self):
- return np.array([self.x, self.y, self.bearing])
- def new_random():
- return Creature(np.random.uniform(Creature.MIN_X, Creature.MAX_X),
- np.random.uniform(Creature.MIN_Y, Creature.MAX_Y),
- np.random.uniform(Creature.MIN_BEARING, Creature.MAX_BEARING))
- class Chaser(Env):
- ACTION_LEFT = 0
- ACTION_RIGHT = 1
- ACTION_NONE = 2
- DELTA_BEARING = 0.12
- AGENT_VELOCITY = 0.009
- RABBIT_VELOCITY = 0.003
- RABBIT_BEARING_DELTA = 0.3
- CATCH_RADIUS = 0.1
- MAX_STEPS = 260
- REWARD_NO_CATCH = -0.1
- CATCH_REWARD = 30.0
- def __init__(self):
- self.action_space = Discrete(3)
- self.observation_space = Box(
- low=np.array([
- Creature.MIN_X,
- Creature.MIN_Y,
- Creature.MIN_BEARING,
- Creature.MIN_X,
- Creature.MIN_Y,
- Creature.MIN_BEARING]
- ),
- high=np.array([
- Creature.MAX_X,
- Creature.MAX_Y,
- Creature.MAX_BEARING,
- Creature.MAX_X,
- Creature.MAX_Y,
- Creature.MAX_BEARING]
- )
- )
- self.reward_range = (Chaser.MAX_STEPS * Chaser.REWARD_NO_CATCH, Chaser.CATCH_REWARD)
- self.state_size = np.prod(self.observation_space.shape)
- self.viewer = None
- def step(self, action):
- assert self.action_space.contains(action)
- self.steps += 1
- def get_agent_bearing_delta():
- if action == Chaser.ACTION_LEFT:
- return -Chaser.DELTA_BEARING
- elif action == Chaser.ACTION_RIGHT:
- return Chaser.DELTA_BEARING
- return 0.0
- self.agent.move(Chaser.AGENT_VELOCITY)
- self.agent.turn(get_agent_bearing_delta())
- self.rabbit.move(Chaser.RABBIT_VELOCITY)
- # Make sure rabbit does not escape screen
- self.rabbit.x = np.clip(self.rabbit.x, Creature.MIN_X, Creature.MAX_X)
- self.rabbit.y = np.clip(self.rabbit.y, Creature.MIN_Y, Creature.MAX_Y)
- self.rabbit.turn(np.random.choice([-Chaser.RABBIT_BEARING_DELTA, 0.0, Chaser.RABBIT_BEARING_DELTA], 1)[0])
- reached_goal = self.agent.distance_to(self.rabbit) < Chaser.CATCH_RADIUS
- reward = Chaser.CATCH_REWARD if reached_goal else Chaser.REWARD_NO_CATCH
- done = self.steps >= Chaser.MAX_STEPS or reached_goal
- return self.unwrap_state(), reward, done, {}
- def reset(self):
- self.steps = 0
- self.agent = Creature.new_random()
- self.rabbit = Creature.new_random()
- while self.agent.distance_to(self.rabbit) < Chaser.CATCH_RADIUS * 4.0:
- self.agent = Creature.new_random()
- self.rabbit = Creature.new_random()
- return self.unwrap_state()
- def unwrap_state(self):
- return np.concatenate((self.agent.unwrap(), self.rabbit.unwrap()))
- def render(self, mode='human'):
- screen_width = 400
- screen_height = 400
- world_width = Creature.MAX_X
- scale = screen_width / world_width
- if self.viewer is None:
- from gym.envs.classic_control import rendering
- self.viewer = rendering.Viewer(screen_width, screen_height)
- agent = rendering.make_circle(radius=5, res=5, filled=True)
- self.agent_translation = rendering.Transform()
- agent.add_attr(self.agent_translation)
- rabbit = rendering.make_circle(radius=3, res=5, filled=True)
- self.rabbit_translation = rendering.Transform()
- rabbit.add_attr(self.rabbit_translation)
- self.viewer.add_geom(agent)
- self.viewer.add_geom(rabbit)
- self.agent_translation.set_translation(self.agent.x * scale, self.agent.y * scale)
- self.agent_translation.set_rotation(self.agent.bearing)
- self.rabbit_translation.set_translation(self.rabbit.x * scale, self.rabbit.y * scale)
- self.rabbit_translation.set_rotation(self.rabbit.bearing)
- return self.viewer.render(return_rgb_array=mode=='rgb_array')
- def close(self):
- pass
- def seed(self, seed=None):
- pass
- if __name__ == '__main__':
- env = Chaser()
- episodes = 10000
- for i in range(episodes):
- done = False
- state = env.reset()
- episode_reward = 0
- while not done:
- env.render()
- next_state, reward, done, _ = env.step(env.action_space.sample())
- episode_reward += reward
- print('Episode reward %s' % episode_reward)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement