Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from tqdm import tqdm_notebook, tqdm
- class StepsProgressBarNotebook(object):
- """
- HTML5 logger for tensorforce using tqdm_notebook for jupyter-notebook.
- Usage:
- `runner.run(episodes=np.inf, episode_finished=StepsProgressBar(steps=1e9, print_every=1000))`
- """
- def __init__(self, steps, print_every=None, mean_of=100):
- """
- steps - stop after this many total steps
- print_every - print the mean metrics every log_intv episodes
- mean_of - progress bar shows the reward as mean of the last n episodes
- """
- self.print_every = print_every
- self.mean_of = mean_of
- self.steps = steps
- self.progbar = tqdm_notebook(desc='',
- total=steps,
- leave=True,
- # mininterval=1,
- unit='steps'
- )
- def __call__(self, r):
- desc = "reward: {reward: 2.4f} [{rewards_min: 2.0f}, {rewards_max: 2.0f}] episodes: {episodes}".format(
- reward=np.mean(r.episode_rewards[-self.mean_of:]),
- rewards_min=np.min(r.episode_rewards[-self.mean_of:]),
- rewards_max=np.max(r.episode_rewards[-self.mean_of:]),
- episodes=r.episode,
- )
- self.progbar.desc = desc
- self.progbar.update(r.episode_lengths[-1])
- if self.print_every and r.total_timesteps%self.print_every==0:
- print(self.progbar.desc)
- return r.total_timesteps<self.steps
- class StepsProgressBar(object):
- """
- Progress bar for tensorforce that stop after N steps.
- Usage:
- `runner.run(episodes=np.inf, episode_finished=StepsProgressBar(steps=1e9, print_every=1000))`
- """
- def __init__(self, steps, print_every=None, mean_of=100):
- """
- steps - stop after this many total steps
- print_every - print the mean metrics every log_intv episodes
- mean_of - progress bar shows the reward as mean of the last n episodes
- """
- self.print_every = print_every
- self.mean_of = mean_of
- self.steps = steps
- self.progbar = tqdm(desc='',
- total=steps,
- leave=True,
- # mininterval=1,
- unit='steps'
- )
- def __call__(self, r):
- desc = "reward: {reward: 2.4f} [{rewards_min: 2.0f}, {rewards_max: 2.0f}] episodes: {episodes}".format(
- reward=np.mean(r.episode_rewards[-self.mean_of:]),
- rewards_min=np.min(r.episode_rewards[-self.mean_of:]),
- rewards_max=np.max(r.episode_rewards[-self.mean_of:]),
- episodes=r.episode,
- )
- self.progbar.desc = desc
- self.progbar.update(r.episode_lengths[-1])
- if self.print_every and r.total_timesteps%self.print_every==0:
- print(self.progbar.desc)
- return r.total_timesteps<self.steps
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement