Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # queue for game memory
- memory = deque(maxlen=1000)
- # start empty queue for stacking frames
- stacked_frames = deque([np.zeros((90, 70), dtype=np.uint8) for i in range(stack_size)], maxlen=4)
- # list for the rewards from each round
- rewards_list = []
- # decay_step determines whether we move randomly or not.
- decay_step = 0
- # play around with this value
- batch_size=20
- # this next part is arbitrary, but vitally important. we need
- # to choose how many games we want to run through. for
- # each game, we set it up the usual way, by re-setting the
- # game environment, and setting the total_reward for the
- # game to zero. but we start this off a little differently by
- # creating a stacked_frames, stacked_state, and setting the new_game
- # parameter in stack_frames to true
- for episode in range(2000):
- state = env.reset()
- total_reward = 0
- state, stacked_frames = stack_frames(stacked_frames, state, True)
- # like previous games, how many moves should we let it make?
- for step in range(1001):
- # if you wish, uncomment the render line to see the game train
- #env.render()
- # increment the decay_step on each move
- decay_step += 1
- # next, we'll predict an action using our pre-fabricated
- # prediction function. Since it spits out a 1-hot array,
- # and Gym needs an int for an action, we'll take the argmax
- # of the output from the function, hence why the function
- # is embedded in the np.argmax()
- act = np.argmax(predict_action(model, decay_step))
- # now, we'll update the game environment
- obs, reward, done, info = env.step(act)
- # we'll add the reward to our existing total_reward
- total_reward += reward
- # if the game is done, we'll make sure the system knows
- # it's game over, append the reward to the total_reward list,
- # and add the stacked_state, action taken, reward, the new
- # state (obs), and whether the game finished.
- if done == True:
- obs = np.zeros((210, 160, 3))
- obs, stacked_frames = stack_frames(stacked_frames, obs, False)
- rewards_list.append(total_reward)
- memory.append((state, act, reward, obs, done))
- break
- # if the game isn't done, don't add total_reward to reward_list
- else:
- obs, stacked_frames = stack_frames(stacked_frames, obs, False)
- memory.append((state, act, reward, obs, done))
- # after all of this, update state to BE the stacked version of obs, and
- # keep on keeping on.
- state = obs
- # here's where the learning kicks in. to start, we need to specify
- # when we want to even start learning. how many memories is a
- # good starting point? i'm arbitrarily starting at 100
- if len(memory) > 100:
- # pulling out useful info from a batch of memories and organizing
- # them before we use each bit of info to structure training data
- batch = sampleMemory(memory, batch_size=batch_size)
- actions = [item[1] for item in batch]
- states = np.array([item[0] for item in batch], ndmin=3)
- rewards = [item[2] for item in batch]
- next_states = np.array([item[0] for item in batch], ndmin=3)
- # creates the rewards that the net will receive for the actions
- # taken in the batch.
- targets = [learning_rate * np.max(item) for item in model.predict(next_states)]
- targets = [targets[i] + rewards[i] for i in range(len(targets))]
- # creates the outputs to fit to
- target_f = [item for item in model.predict(states)]
- for i in range(len(target_f)):
- target_f[i][actions[i]] = targets[i]
- # train on whole batch!
- model.train_on_batch(x=np.array(states).reshape(-1, * state_size),
- y=np.array(target_f).reshape(-1, action_size))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement