Advertisement
Guest User

code

a guest
Jan 28th, 2020
186
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 20.83 KB | None | 0 0
  1. import pygame
  2. from PIL import Image
  3. import numpy as np
  4. import math
  5. import matplotlib.pyplot as plt
  6. from PIL import Image
  7. import random
  8. import os
  9. import numpy as np
  10. import keras.backend.tensorflow_backend as backend
  11. from keras.models import Sequential
  12. from keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Activation, Flatten
  13. from keras.optimizers import Adam
  14. from keras.callbacks import TensorBoard
  15. import tensorflow as tf
  16. from collections import deque
  17. import time
  18. import random
  19. from tqdm import tqdm
  20. import os
  21. from PIL import Image
  22. import cv2
  23. import pickle
  24.  
  25. SIZE = 500
  26. replay_memory_size = 50_000
  27. MODEL_NAME = "256x2"
  28. min_replay_memory_size = 1_000
  29. minibatch_size = 64
  30. discount = 0.95
  31. update_target_every = 5
  32. MIN_REWARD = -200  # For model save
  33. MEMORY_FRACTION = 0.20
  34. c = 0
  35. x_train = []
  36. y_train = []
  37. # Environment settings
  38. EPISODES = 10_000
  39.  
  40. epsilon = 0.4198  # not a constant, going to be decayed
  41. EPSILON_DECAY = 0.99975
  42. MIN_EPSILON = 0.001
  43.  
  44. #  Stats settings
  45. AGGREGATE_STATS_EVERY = 50  # episodes
  46. SHOW_PREVIEW = True
  47. k = 0
  48.  
  49. run_options = tf.compat.v1.RunOptions(report_tensor_allocations_upon_oom = True)
  50.  
  51.  
  52.  
  53.  
  54. def iscollision(x_apple, y_apple, x, y):
  55.     distence = math.sqrt((math.pow(x_apple - x, 2)) + (math.pow(y_apple - y, 2)))
  56.     '''if distence < 3:
  57.        return True
  58.    else:
  59.        return False'''
  60.     return distence
  61.  
  62.  
  63. class blob:
  64.     def __init__(self):
  65.         self.x = 80
  66.         self.y = 300
  67.         self.width = 20
  68.         self.height = 20
  69.  
  70.     def collision(self, other):
  71.         if self.x == other.x and self.y == other.y:
  72.             return True
  73.  
  74.     def move(self, select):
  75.         if select == 1:
  76.             self.x -= 1
  77.         if select == 2:
  78.             self.x += 1
  79.         if select == 3:
  80.             self.y += 1
  81.         if select == 4:
  82.             self.y -= 1
  83.  
  84.  
  85. class blobfood:
  86.     def __init__(self):
  87.         self.x = []
  88.         self.y = []
  89.         self.width = 20
  90.         self.height = 20
  91.         self.x.append(80)
  92.         self.y.append(350)
  93.  
  94.         self.x.append(80)
  95.         self.y.append(120)
  96.  
  97.         self.x.append(80)
  98.         self.y.append(220)
  99.  
  100.         self.x.append(250)
  101.         self.y.append(115)
  102.  
  103.         self.x.append(400)
  104.         self.y.append(350)
  105.  
  106.         self.x.append(400)
  107.         self.y.append(120)
  108.  
  109.         self.x.append(400)
  110.         self.y.append(220)
  111.  
  112.         self.x.append(250)
  113.         self.y.append(360)
  114.  
  115.     def collision(self, other):
  116.         if self.x == other.x and self.y == other.y:
  117.             return True
  118.  
  119.     def move(self, select):
  120.         if select == 1:
  121.             self.x -= 0.1
  122.         if select == 2:
  123.             self.x += 0.1
  124.         if select == 3:
  125.             self.y += 0.1
  126.         if select == 4:
  127.             self.y -= 0.1
  128.  
  129.  
  130. class walls:
  131.     def __init__(self):
  132.         self.x = []
  133.         self.y = []
  134.         self.width = []
  135.         self.height = []
  136.         self.x.append(50)
  137.         self.y.append(100)
  138.         self.width.append(1)
  139.         self.height.append(100)
  140.         self.x.append(50)
  141.         self.y.append(100)
  142.         self.width.append(100)
  143.         self.height.append(1)
  144.         self.x.append(150)
  145.         self.y.append(100)
  146.         self.width.append(100)
  147.         self.height.append(1)
  148.         self.x.append(250)
  149.         self.y.append(100)
  150.         self.width.append(100)
  151.         self.height.append(1)
  152.         self.x.append(350)
  153.         self.y.append(100)
  154.         self.width.append(100)
  155.         self.height.append(1)
  156.         self.x.append(435)
  157.         self.y.append(100)
  158.         self.width.append(1)
  159.         self.height.append(100)
  160.         self.x.append(435)
  161.         self.y.append(200)
  162.         self.width.append(1)
  163.         self.height.append(100)
  164.         self.x.append(435)
  165.         self.y.append(300)
  166.         self.width.append(1)
  167.         self.height.append(100)
  168.  
  169.         self.x.append(350)
  170.         self.y.append(385)
  171.         self.width.append(100)
  172.         self.height.append(1)
  173.         self.x.append(250)
  174.         self.y.append(385)
  175.         self.width.append(100)
  176.         self.height.append(1)
  177.         self.x.append(150)
  178.         self.y.append(385)
  179.         self.width.append(100)
  180.         self.height.append(1)
  181.         self.x.append(50)
  182.         self.y.append(385)
  183.         self.width.append(100)
  184.         self.height.append(1)
  185.  
  186.         self.x.append(50)
  187.         self.y.append(300)
  188.         self.width.append(1)
  189.         self.height.append(100)
  190.         self.x.append(50)
  191.         self.y.append(200)
  192.         self.width.append(1)
  193.         self.height.append(100)
  194.         #ggggggggggggggggggggggggggggg
  195.  
  196.         #14
  197.         self.x.append(130)
  198.         self.y.append(170)
  199.         self.width.append(1)
  200.         self.height.append(80)
  201.         #15
  202.         self.x.append(130)
  203.         self.y.append(230)
  204.         self.width.append(1)
  205.         self.height.append(80)
  206.  
  207.         #16
  208.         self.x.append(145)
  209.         self.y.append(155)
  210.         self.width.append(80)
  211.         self.height.append(1)
  212.         #17
  213.         self.x.append(210)
  214.         self.y.append(155)
  215.         self.width.append(80)
  216.         self.height.append(1)
  217.         #18
  218.  
  219.         self.x.append(270)
  220.         self.y.append(155)
  221.         self.width.append(80)
  222.         self.height.append(1)
  223.  
  224.  
  225.         self.x.append(350)
  226.         self.y.append(170)
  227.         self.width.append(1)
  228.         self.height.append(80)
  229.  
  230.         self.x.append(350)
  231.         self.y.append(230)
  232.         self.width.append(1)
  233.         self.height.append(80)
  234.  
  235.         self.x.append(145)
  236.         self.y.append(310)
  237.         self.width.append(80)
  238.         self.height.append(1)
  239.         #self.x.append(130)
  240.         #self.y.append(350)
  241.         #self.width.append(80)
  242.         #self.height.append(1)
  243.  
  244.         self.x.append(225)
  245.         self.y.append(310)
  246.         self.width.append(80)
  247.         self.height.append(1)
  248.  
  249.         self.x.append(270)
  250.         self.y.append(310)
  251.         self.width.append(80)
  252.         self.height.append(1)
  253.  
  254.  
  255. class BlobEnv:
  256.     SIZE = 500
  257.     RETURN_IMAGES = True
  258.     MOVE_PENALTY = 10
  259.     ENEMY_PENALTY = 300
  260.     FOOD_REWARD = 100
  261.     OBSERVATION_SPACE_VALUES = (SIZE, SIZE, 3)  # 4
  262.     ACTION_SPACE_SIZE = 9
  263.     PLAYER_N = 1  # player key in dict
  264.     FOOD_N = 2  # food key in dict
  265.     ENEMY_N = 3  # enemy key in dict
  266.     #pygame.init()
  267.     #win = pygame.display.set_mode((SIZE, SIZE))
  268.     # the dict! (colors)
  269.     d = {1: (255, 175, 0),
  270.          2: (0, 255, 0),
  271.          3: (0, 0, 255)}
  272.  
  273.     def __init__(self):
  274.         self.win = pygame.display.set_mode((SIZE, SIZE))
  275.         self.wall = walls()
  276.         self.player = blob()
  277.         self.food = blobfood()
  278.         self.MOVE_PENALTY = 10
  279.         self.ENEMY_PENALTY = 300
  280.         self.FOOD_REWARD = 100
  281.         self.size = 1000
  282.         self.i = 0
  283.  
  284.     def reset(self):
  285.         pygame.init()
  286.         self.win = pygame.display.set_mode((SIZE, SIZE))
  287.         self.wall = walls()
  288.         self.player = blob()
  289.         self.food = blobfood()
  290.         self.MOVE_PENALTY = 5
  291.         self.ENEMY_PENALTY = 100
  292.         self.FOOD_REWARD = 40
  293.         self.i = 0
  294.         path = "images/screenshot.jpeg"
  295.         pygame.image.save(self.win, path)
  296.         im = Image.open(path)
  297.         observation = np.array(im)
  298.         observation = np.resize(observation, (10, 10, 3))
  299.         #print(observation.shape)
  300.         return observation
  301.  
  302.     def get_all(self, done, choice,k):
  303.         new_observation = np.array(self.get_image())
  304.         new_observation = np.resize(new_observation, (10, 10, 3))
  305.         self.player.move(choice)
  306.         for c in range(len(self.wall.x)):
  307.             if self.wall.height[c] > self.wall.width[c]:
  308.                 h_w = self.wall.height[c]
  309.                 for i in range(self.wall.height[c]):
  310.                     # print(wall.x[d])
  311.                     distence = iscollision(self.wall.x[c], self.wall.y[c] + (i + 5), self.player.x, self.player.y)
  312.                     if distence < 3:
  313.                         reward = -self.ENEMY_PENALTY
  314.                         done = True
  315.             elif self.wall.height[c] < self.wall.width[c]:
  316.                 for i in range(self.wall.width[c]):
  317.                     distence = iscollision(self.wall.x[c] + (i + 5), self.wall.y[c], self.player.x, self.player.y)
  318.                     if distence < 3:
  319.                         reward = -self.ENEMY_PENALTY
  320.                         done = True
  321.  
  322.         for d in range(len(self.food.x)):
  323.             distence = iscollision(self.food.x[d], self.food.y[d], self.player.x, self.player.y)
  324.             if distence < 20:
  325.                 reward = self.FOOD_REWARD
  326.                 if self.i == 8:
  327.                     done = True
  328.                     self.i = 0
  329.                 self.i += 1
  330.                 self.food.x[d] = 20000
  331.                 self.food.y[d] = 20000
  332.                 #print("eat")
  333.         else:
  334.             reward = -self.MOVE_PENALTY
  335.  
  336.         return new_observation, reward, done
  337.  
  338.     def render(self):
  339.         self.get_image()
  340.         pygame.display.update()
  341.  
  342.     def get_image(self):
  343.         self.win.fill((0, 0, 0))
  344.         # env = np.zeros((self.size, self.size, 3), dtype=np.uint8)
  345.         pygame.draw.rect(self.win, (255, 0, 0), (self.player.x, self.player.y, self.player.width, self.player.height))
  346.         for b in range(len(self.food.x)):
  347.             pygame.draw.rect(self.win, (255, 0, 0), (self.food.x[b], self.food.y[b], self.food.width, self.food.height))
  348.             # env[self.food.x[b]][self.food.y[b]] = self.d[self.FOOD_N]
  349.         for i in range(len(self.wall.x)):
  350.             if i >= 5 and i <= 7:
  351.                 pygame.draw.rect(self.win, (255, 0, 0),
  352.                                  (self.wall.x[i] + 15, self.wall.y[i], self.wall.width[i], self.wall.height[i]))
  353.                 # env[self.wall.x[i]  + 15][self.wall.y[i]] = self.d[self.ENEMY_N]
  354.             elif i >= 8 and i <= 11:
  355.                 pygame.draw.rect(self.win, (255, 0, 0),
  356.                                  (self.wall.x[i], self.wall.y[i] + 15, self.wall.width[i], self.wall.height[i]))
  357.                 # env[self.wall.x[i]][self.wall.y[i] + 15] = self.d[self.ENEMY_N]
  358.             elif i >= 14 and i <= 15:
  359.                 pygame.draw.rect(self.win, (255, 0, 0),
  360.                                  (self.wall.x[i] + 15, self.wall.y[i], self.wall.width[i], self.wall.height[i]))
  361.                 # env[self.wall.x[i]  + 15][self.wall.y[i]] = self.d[self.ENEMY_N]
  362.             elif i >= 16 and i <= 18:
  363.                 pygame.draw.rect(self.win, (255, 0, 0),
  364.                                  (self.wall.x[i], self.wall.y[i] + 15, self.wall.width[i], self.wall.height[i]))
  365.                 # env[self.wall.x[i]][self.wall.y[i] + 15] = self.d[self.ENEMY_N]
  366.             else:
  367.                 pygame.draw.rect(self.win, (255, 0, 0),
  368.                                  (self.wall.x[i], self.wall.y[i], self.wall.width[i], self.wall.height[i]))
  369.  
  370.         pygame.display.update()
  371.  
  372.  
  373.         path = f"images/screenshot1.jpeg"
  374.         pygame.image.save(self.win, path)
  375.         im = Image.open(path)
  376.         return im
  377.  
  378.     def key_movement(self, size):
  379.         key = pygame.key.get_pressed()
  380.  
  381.         if key[pygame.K_w]:
  382.             self.player.move(4)
  383.  
  384.         if key[pygame.K_a]:
  385.             self.player.move(1)
  386.  
  387.         if key[pygame.K_s]:
  388.             self.player.move(3)
  389.  
  390.         if key[pygame.K_d]:
  391.             self.player.move(2)
  392.  
  393.  
  394. env = BlobEnv()
  395.  
  396. # For stats
  397. ep_rewards = [-200]
  398.  
  399. # For more repetitive results
  400. random.seed(1)
  401. np.random.seed(1)
  402. tf.compat.v1.set_random_seed(1)
  403.  
  404. # Create models folder
  405. if not os.path.isdir('models'):
  406.     os.makedirs('models')
  407.  
  408.  
  409. # Own Tensorboard class
  410. class ModifiedTensorBoard(tf.compat.v1.keras.callbacks.TensorBoard):
  411.  
  412.     # Overriding init to set initial step and writer (we want one log file for all .fit() calls)
  413.     def __init__(self, **kwargs):
  414.         super().__init__(**kwargs)
  415.         self.step = 1
  416.         self.writer = tf.summary.create_file_writer(self.log_dir)
  417.  
  418.     def _write_logs(self, logs, index):
  419.         with self.writer.as_default():
  420.             for name, value in logs.items():
  421.                 tf.summary.scalar(name, value, step=index)
  422.                 self.step += 1
  423.                 self.writer.flush()
  424.  
  425.     # Overriding this method to stop creating default log writer
  426.     def set_model(self, model):
  427.         pass
  428.  
  429.     # Overrided, saves logs with our step number
  430.     # (otherwise every .fit() will start writing from 0th step)
  431.     def on_epoch_end(self, epoch, logs=None):
  432.         self.update_stats(**logs)
  433.  
  434.     # Overrided
  435.     # We train for one batch only, no need to save anything at epoch end
  436.     def on_batch_end(self, batch, logs=None):
  437.         pass
  438.  
  439.     # Overrided, so won't close writer
  440.     def on_train_end(self, _):
  441.         pass
  442.  
  443.     # Custom method for saving own metrics
  444.     # Creates writer, writes custom metrics and closes writer
  445.     def update_stats(self, **stats):
  446.         self._write_logs(stats, self.step)
  447.  
  448.  
  449. class DQNagent:
  450.     run_opts = tf.compat.v1.RunOptions(report_tensor_allocations_upon_oom=True)
  451.     def __init__(self):
  452.         # main model gets trained every step
  453.         self.model = self.create_model()
  454.  
  455.         # Target model this is what we use to predict every step
  456.         self.target_model = self.create_model()
  457.         self.target_model.set_weights(self.model.get_weights())
  458.  
  459.         self.replay_memory = deque(maxlen=replay_memory_size)
  460.  
  461.         self.tensorboard = ModifiedTensorBoard(log_dir=f"logs/{MODEL_NAME}-{int(time.time())}", profile_batch=0)
  462.  
  463.         self.target_update_counter = 0
  464.         self.run_opts = tf.compat.v1.RunOptions(report_tensor_allocations_upon_oom=True)
  465.         self.i = 0
  466.         self.b = 0
  467.  
  468.     def create_model(self):
  469.         model = Sequential()
  470.         model.add(Conv2D(64, (3, 3), input_shape=(10,10,3)))
  471.         model.add(Activation('relu'))
  472.         model.add(MaxPooling2D(pool_size=(2, 2)))
  473.         model.add(Dropout(0.2))
  474.  
  475.         model.add(Conv2D(160, (3, 3)))
  476.         model.add(Activation("relu"))
  477.         model.add(MaxPooling2D(pool_size=(2, 2)))
  478.         model.add(Dropout(0.2))
  479.  
  480.         model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
  481.  
  482.         model.add(Dense(41))
  483.  
  484.         model.add(Dense(51))
  485.  
  486.         model.add(Dense(4, activation="linear"))
  487.  
  488.         model.compile(loss="mse",
  489.                       optimizer=Adam(lr=0.001),
  490.                       metrics=['accuracy'])
  491.         return model
  492.  
  493.     def update_replay_memory(self, transition):
  494.         self.replay_memory.append(transition)
  495.  
  496.     def get_qs(self, state):
  497.         return self.model.predict(np.array(state).reshape(-1, *state.shape) / 255)[0]
  498.  
  499.     def save_model(self):
  500.         self.model.save(f"models/256x10x64x4-{int(time.time())}")
  501.  
  502.     def train(self, terminal_state, step,x_train_list,y_train_list):
  503.         if len(self.replay_memory) < min_replay_memory_size:
  504.             return
  505.  
  506.         minibatch = random.sample(self.replay_memory, minibatch_size)
  507.  
  508.         current_states = np.array([transition[0] for transition in minibatch]) / 255
  509.         current_states = np.resize(current_states, (10, 10, 3))
  510.         current_qs_list = self.model.predict(np.array(current_states).reshape(-1, *current_states.shape))
  511.  
  512.         new_current_states = np.array([transition[3] for transition in minibatch]) / 255
  513.         new_current_states = np.resize(new_current_states, (10, 10, 3))
  514.  
  515.         x = []
  516.         y = []
  517.  
  518.         future_ps_list = self.target_model.predict(np.array(new_current_states).reshape(-1, *new_current_states.shape))
  519.  
  520.         for index, (current_state, action, reward, new_current_state, done) in enumerate(minibatch):
  521.             self.i += 1
  522.             if not done:
  523.                 max_future_q = np.argmax(future_ps_list[-index:])
  524.                 new_q = reward + discount * max_future_q
  525.             else:
  526.                 new_q = reward
  527.  
  528.             current_qs = current_qs_list[-index:]
  529.             current_qs[-action:] = new_q
  530.  
  531.             x.append(current_state)
  532.             y.append(current_qs)
  533.  
  534.             '''if self.i <= 60_000:
  535.                x_train = np.array(x)
  536.                y_train = np.array(y)
  537.                x_train = np.resize(x_train, (10, 10, 3))
  538.                y_train = np.resize(y_train, (4))
  539.                #x_train = x_train / 255
  540.                x_train_list.append(x_train)
  541.                y_train_list.append(y_train)
  542.  
  543.            if self.i == 60_000:
  544.  
  545.                x_train = np.array(x_train_list)
  546.                y_train = np.array(y_train_list)
  547.                print(x_train.shape)
  548.                print(y_train.shape)
  549.                #X_train = np.resize(self.i, 10, 10, 3)
  550.                #y_train = np.resize(self.i, 4)
  551.                #X_train = X_train / 255.0
  552.                #print(X_train.shape)
  553.                #print(y_train.shape)
  554.                if self.b == 1:
  555.                    pickle_out = open("x_test.pickle","wb")
  556.                    pickle.dump(x_train, pickle_out)
  557.                    pickle_out.close()
  558.  
  559.                    pickle_out = open("y_test.pickle","wb")
  560.                    pickle.dump(y_train, pickle_out)
  561.                    pickle_out.close()
  562.                else:
  563.                    pickle_out = open("x.pickle","wb")
  564.                    pickle.dump(x_train, pickle_out)
  565.                    pickle_out.close()
  566.  
  567.                    pickle_out = open("y.pickle","wb")
  568.                    pickle.dump(y_train, pickle_out)
  569.                    pickle_out.close()
  570.                self.b += 1
  571.                self.i = 0
  572.                x_train_list = []
  573.                y_train_list = []'''
  574.  
  575.  
  576.         x = np.array(x)
  577.         y = np.array(y)
  578.         x = np.resize(x, (64, 10, 10, 3))
  579.         y = np.resize(y, (64, 4))
  580.         x = x / 255
  581.         #print(x.shape)
  582.         #print(y.shape)
  583.         #print(len(y))
  584.         #print(len(x))
  585.         self.model.fit(x, y,epochs=2, batch_size=minibatch_size,verbose=0,shuffle=False,callbacks=[self.tensorboard] if terminal_state else None)
  586.  
  587.         # updating to determine if we want to update target_model yet
  588.         if terminal_state:
  589.             self.target_update_counter += 1
  590.  
  591.         if self.target_update_counter > update_target_every:
  592.             self.target_model.set_weights(self.model.get_weights())
  593.             self.target_update_counter = 0
  594.  
  595.  
  596. agent = DQNagent()
  597.  
  598. for episode in tqdm(range(1, EPISODES + 1), ascii=True, unit="episode"):
  599.  
  600.     agent.tensorboard.step = episode
  601.  
  602.     episode_reward = 0
  603.     step = 1
  604.     current_state = env.reset()
  605.  
  606.     done = False
  607.     #c = 0
  608.     #d = 0
  609.     if SHOW_PREVIEW and not episode % 10:
  610.         agent.save_model()
  611.     while not done:
  612.         for event in pygame.event.get():
  613.         # check if the event is the X button
  614.             if event.type==pygame.QUIT:
  615.                 # if it is quit the game
  616.                 done = True
  617.         #d +=1
  618.         #print(c)
  619.         if np.random.random() > epsilon:
  620.             action = np.argmax(agent.get_qs(current_state))
  621.         else:
  622.             action = np.random.randint(0, env.ACTION_SPACE_SIZE)
  623.  
  624.         new_state, reward, done = env.get_all(done,action,k)
  625.         os.remove(f"images/screenshot1.jpeg")
  626.         k += 1
  627.         episode_reward += reward
  628.         if reward == 100:
  629.             agent.save_model()
  630.  
  631.         '''lcurrent_state = list(current_state)
  632.        lnew_state = list(new_state)
  633.  
  634.        if d > 50:
  635.            d = 0
  636.            if np.array_equal(current_state,new_state):
  637.                c += 1
  638.                if c > 30:
  639.                    c = 0
  640.                    break'''
  641.  
  642.  
  643.  
  644.         #if episode == 1:
  645.         #    break
  646.  
  647.         #if episode > 100:
  648.         #    if SHOW_PREVIEW and not episode % 10:
  649.         #        env.get_image(render=True)
  650.         #else:
  651.         #    if SHOW_PREVIEW and not episode % AGGREGATE_STATS_EVERY:
  652.         #        env.get_image(render=True)
  653.  
  654.         agent.update_replay_memory((current_state, action, reward, new_state, done))
  655.         agent.train(done, step, x_train, y_train)
  656.  
  657.         current_state = new_state
  658.         step += 1
  659.  
  660.     if not episode % AGGREGATE_STATS_EVERY or episode == 1:
  661.  
  662.         average_reward = sum(ep_rewards[-AGGREGATE_STATS_EVERY:]) / len(ep_rewards[-AGGREGATE_STATS_EVERY:])
  663.         min_reward = min(ep_rewards[-AGGREGATE_STATS_EVERY:])
  664.         max_reward = max(ep_rewards[-AGGREGATE_STATS_EVERY:])
  665.         agent.tensorboard.update_stats(reward_avg=average_reward, reward_min=min_reward, reward_max=max_reward,
  666.                                        epsilon=epsilon)
  667.     pygame.quit()
  668.     ep_rewards.append(episode_reward)
  669.         # Save model, but only when min reward is greater or equal a set value
  670.     if min_reward >= MIN_REWARD:
  671.         agent.model.save(f'models/{MODEL_NAME}__{max_reward:_>7.2f}max_{average_reward:_>7.2f}avg_{min_reward:_>7.2f}min__{int(time.time())}')
  672.  
  673.     # Decay epsilon
  674.     if epsilon > MIN_EPSILON:
  675.         epsilon *= EPSILON_DECAY
  676.         epsilon = max(MIN_EPSILON, epsilon)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement