Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def train(model, epochs):
- total = 0
- start = time.time()
- entire_hist = []
- profits = []
- for i in range(epochs):
- loss = 0
- accuracy = 0
- hist = []
- env.reset()
- game_over = False
- input_t = env.observe()
- while not game_over:
- input_tm1 = input_t
- if np.random.random() <= epsilon:
- action = np.random.randint(0, num_actions, size=None)
- else:
- q = model.predict(input_tm1)
- action = np.argmax(q[0])
- input_t, reward, game_over = env.act(action)
- exp_replay.remember([input_tm1, action, reward, input_t], game_over)
- inputs, targets = exp_replay.get_batch(model, batch_size=batch_size)
- loss, accuracy = model.train_on_batch(inputs, targets)
- hist.append([loss, accuracy, env.main])
- print(f'counter: {env.counter}, action taken: {action}, reward: {round(reward, 2)}, main: {round(env.main)}, secondary: {env.secondary}')
- if game_over:
- print('GAME OVER!')
- entire_hist.append(hist)
- profits.append(total)
- print(f'total profit: {env.total_profit}')
- print(f'epoch: {i}, loss: {loss}, accuracy: {accuracy}')
- print('n')
- print('*'*20)
- end = int(time.time() - start)
- print(f'training time: {end} seconds')
- return entire_hist, total
Add Comment
Please, Sign In to add comment