Advertisement
Guest User

Untitled

a guest
Sep 26th, 2017
58
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.13 KB | None | 0 0
  1. from tqdm import tqdm_notebook, tqdm
  2. class StepsProgressBarNotebook(object):
  3. """
  4. HTML5 logger for tensorforce using tqdm_notebook for jupyter-notebook.
  5.  
  6. Usage:
  7. `runner.run(episodes=np.inf, episode_finished=StepsProgressBar(steps=1e9, print_every=1000))`
  8. """
  9.  
  10. def __init__(self, steps, print_every=None, mean_of=100):
  11. """
  12. steps - stop after this many total steps
  13. print_every - print the mean metrics every log_intv episodes
  14. mean_of - progress bar shows the reward as mean of the last n episodes
  15. """
  16. self.print_every = print_every
  17. self.mean_of = mean_of
  18. self.steps = steps
  19. self.progbar = tqdm_notebook(desc='',
  20. total=steps,
  21. leave=True,
  22. # mininterval=1,
  23. unit='steps'
  24.  
  25. )
  26.  
  27. def __call__(self, r):
  28. desc = "reward: {reward: 2.4f} [{rewards_min: 2.0f}, {rewards_max: 2.0f}] episodes: {episodes}".format(
  29. reward=np.mean(r.episode_rewards[-self.mean_of:]),
  30. rewards_min=np.min(r.episode_rewards[-self.mean_of:]),
  31. rewards_max=np.max(r.episode_rewards[-self.mean_of:]),
  32. episodes=r.episode,
  33. )
  34. self.progbar.desc = desc
  35. self.progbar.update(r.episode_lengths[-1])
  36.  
  37. if self.print_every and r.total_timesteps%self.print_every==0:
  38. print(self.progbar.desc)
  39.  
  40. return r.total_timesteps<self.steps
  41.  
  42.  
  43. class StepsProgressBar(object):
  44. """
  45. Progress bar for tensorforce that stop after N steps.
  46.  
  47. Usage:
  48. `runner.run(episodes=np.inf, episode_finished=StepsProgressBar(steps=1e9, print_every=1000))`
  49. """
  50.  
  51. def __init__(self, steps, print_every=None, mean_of=100):
  52. """
  53. steps - stop after this many total steps
  54. print_every - print the mean metrics every log_intv episodes
  55. mean_of - progress bar shows the reward as mean of the last n episodes
  56. """
  57. self.print_every = print_every
  58. self.mean_of = mean_of
  59. self.steps = steps
  60. self.progbar = tqdm(desc='',
  61. total=steps,
  62. leave=True,
  63. # mininterval=1,
  64. unit='steps'
  65.  
  66. )
  67.  
  68. def __call__(self, r):
  69. desc = "reward: {reward: 2.4f} [{rewards_min: 2.0f}, {rewards_max: 2.0f}] episodes: {episodes}".format(
  70. reward=np.mean(r.episode_rewards[-self.mean_of:]),
  71. rewards_min=np.min(r.episode_rewards[-self.mean_of:]),
  72. rewards_max=np.max(r.episode_rewards[-self.mean_of:]),
  73. episodes=r.episode,
  74. )
  75. self.progbar.desc = desc
  76. self.progbar.update(r.episode_lengths[-1])
  77.  
  78. if self.print_every and r.total_timesteps%self.print_every==0:
  79. print(self.progbar.desc)
  80.  
  81. return r.total_timesteps<self.steps
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement