Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from random import randint
- import numpy as np
- from tensorforce.agents import PPOAgent
- from tensorforce.environments import Environment
- from tensorforce.execution import Runner
- class SimpleEnvironment(Environment):
- def __init__(self):
- self.counter = 0
- @property
- def states(self):
- print "getting states"
- return dict(type='float', shape=(2,))
- @property
- def actions(self):
- return dict(type='float', shape=1)
- def execute(self, actions):
- self.counter += 1
- reward = -np.linalg.norm(actions)
- done = (self.counter % 30 == 0)
- return np.array([2,2]), done, reward
- def reset(self):
- return np.array([2, 2])
- if __name__ == "__main__":
- environment = SimpleEnvironment()
- agent = PPOAgent(
- states=environment.states,
- actions=environment.actions,
- network=[dict(type='dense', size=2),
- dict(type='dense', size=2)
- ],
- batching_capacity=1000,
- step_optimizer=dict(
- type='adam',
- learning_rate=1e-3
- )
- )
- runner = Runner(agent=agent, environment=environment)
- def episode_finished(r):
- print("Finished episode {ep} after {ts} timesteps (reward: {reward})".format(ep=r.episode, ts=r.episode_timestep,
- reward=r.episode_rewards[-1]))
- return True
- # Start learning
- runner.run(episodes=300000, max_episode_timesteps=300, episode_finished=episode_finished, deterministic=False)
- runner.close()
Add Comment
Please, Sign In to add comment