Advertisement
Guest User

Untitled

a guest
Apr 24th, 2017
82
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.73 KB | None | 0 0
  1. from gym.core import Wrapper
  2. from pickle import dumps,loads
  3. from collections import namedtuple
  4.  
  5. #a container for get_result function. Works just like tuple, but prettier
  6. ActionResult = namedtuple("action_result",("snapshot","observation","reward","is_done","info"))
  7.  
  8.  
  9. class WithSnapshots(Wrapper):
  10.    
  11.     def __init__(self,env):      
  12.         """
  13.        Creates a wrapper that supports saving and loading environemnt states.
  14.        Required for planning algorithms.
  15.        
  16.        This class will have access to the core environment as self.env, e.g.:
  17.        - self.env.reset()           #reset original env
  18.        - self.env.ale.cloneState()  #make snapshot for atari. load with .restoreState()
  19.        - ...
  20.        
  21.        You can also use reset, step and render directly for convenience.
  22.        - s, r, _, _ = self.step(action)   #step, same as self.env.step(action)
  23.        - self.render(close=True)          #close window, same as self.env.render(close=True)
  24.        
  25.        """
  26.  
  27.         Wrapper.__init__(self,env)
  28.        
  29.     def get_snapshot(self):
  30.         """
  31.        :returns: environment state that can be loaded with load_snapshot
  32.        Snapshots guarantee same env behaviour each time they are loaded.
  33.        
  34.        Warning! Snapshots can be arbitrary things (strings, integers, json, tuples)
  35.        Don't count on them being pickle strings when implementing MCTS.
  36.        
  37.        Developer Note: Make sure the object you return will not be affected by
  38.        anything that happens to the environment after it's saved.
  39.        You shouldn't, for example, return self.env.
  40.        In case of doubt, use pickle.dumps or deepcopy.
  41.        
  42.        """
  43.         self.render(close=True) #close popup windows since we can't pickle them
  44.         return dumps(self.env)
  45.    
  46.     def load_snapshot(self,snapshot):
  47.         """
  48.        Loads snapshot as current env state.
  49.        Should not change snapshot inplace (in case of doubt, deepcopy).
  50.        """
  51.        
  52.         assert not hasattr(self,"_monitor") or hasattr(self.env,"_monitor"), "can't backtrack while recording"
  53.  
  54.         self.render(close=True) #close popup windows since we can't load into them
  55.         self.env = loads(snapshot)
  56.    
  57.     def get_result(self,snapshot,action):
  58.         """
  59.        A convenience function that
  60.        - loads snapshot,
  61.        - commits action via self.step,
  62.        - and takes snapshot again :)
  63.        
  64.        :returns: next snapshot, next_observation, reward, is_done, info
  65.        
  66.        Basically it returns next snapshot and everything that env.step would have returned.
  67.        """
  68.        
  69.         <your code here>
  70.        
  71.         return ActionResult(new_snapshot,obs,r,done,info)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement