Guest User

Untitled

a guest
Nov 11th, 2024
42
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 12.12 KB | None | 0 0
  1. #! /usr/bin/python3
  2.  
  3. import pygame
  4. import random
  5. import numpy as np
  6. import math
  7. import sys
  8. import cv2
  9.  
  10. import tensorflow as tf
  11. from tensorflow.keras import layers, models
  12. from collections import deque
  13.  
  14. # Game environment parameters
  15. render = 0
  16. ACTION_SPACE = 7 # Actions: [NOOP, MOVE_UP, MOVE_DOWN, MOVE_LEFT, MOVE_RIGHT, SHOOT_UP, SHOOT_DOWN]
  17.  
  18. # Initialize Pygame
  19. pygame.init()
  20.  
  21. # Screen dimensions
  22. WIDTH, HEIGHT = 400, 300
  23. IMG_SHAPE = (84, 84) # Desired image size for CNN input
  24.  
  25. # Colors
  26. WHITE = (255, 255, 255)
  27. BLACK = (0, 0, 0)
  28. RED = (255, 0, 0)
  29. GREEN = (0, 255, 0)
  30.  
  31. # Game variables
  32. clock = pygame.time.Clock()
  33. score = 0
  34. font = pygame.font.Font(None, 36)
  35.  
  36. class FPSGameEnv:
  37. def __init__(self):
  38. pygame.init()
  39. self.screen = pygame.display.set_mode((WIDTH, HEIGHT))
  40. pygame.display.set_caption("Simple FPS Game with DQN")
  41. self.clock = pygame.time.Clock()
  42. self.reset()
  43.  
  44. def reset(self):
  45. # Reset player and enemies
  46. self.player_pos = [WIDTH // 2, HEIGHT // 2]
  47. self.player_speed = 5
  48. self.player_size = 25
  49. self.player_health = 5
  50. self.score = 0
  51. self.enemies = []
  52. self.bullets = []
  53. self.steps = 0
  54.  
  55. # Enemy variables
  56. self.enemy_size = 20
  57. self.enemy_speed = 2
  58. self.spawn_rate = 25 # Increase to spawn enemies faster
  59.  
  60. # Bullet variables
  61. self.bullet_size = 5
  62. self.bullet_speed = 10
  63. self.bullets = []
  64. return self.get_state()
  65.  
  66. def spawn_enemy(self):
  67. x = random.randint(0, WIDTH - 40)
  68. y = random.randint(-100, -40)
  69. self.enemies.append([x, y])
  70.  
  71. def draw_enemies(self):
  72. for enemy in self.enemies:
  73. pygame.draw.rect(self.screen, RED, (enemy[0], enemy[1], self.enemy_size, self.enemy_size))
  74.  
  75. def handle_bullets(self):
  76. for bullet in self.bullets[:]:
  77. bullet[0] += bullet[2]
  78. bullet[1] += bullet[3]
  79. # Remove bullet if it goes off-screen
  80. if not (0 <= bullet[0] <= WIDTH and 0 <= bullet[1] <= HEIGHT):
  81. self.bullets.remove(bullet)
  82.  
  83. def draw_bullets(self):
  84. for bullet in self.bullets:
  85. pygame.draw.circle(self.screen, GREEN, (bullet[0], bullet[1]), self.bullet_size)
  86.  
  87. def update_score(self):
  88. score_text = font.render(f"Score: {self.score}", True, WHITE)
  89. health_text = font.render(f"Health: {self.player_health}", True, WHITE)
  90. self.screen.blit(score_text, (10, 10))
  91. self.screen.blit(health_text, (10, 40))
  92.  
  93. def get_screen_image(self):
  94. # Capture the game screen as an image and resize to the desired input shape
  95. screen_data = pygame.surfarray.array3d(self.screen)
  96. gray_screen = cv2.cvtColor(screen_data, cv2.COLOR_RGB2GRAY) # Convert to grayscale
  97. resized_screen = cv2.resize(gray_screen, IMG_SHAPE) # Resize to IMG_SHAPE
  98. return np.expand_dims(resized_screen, axis=-1) # Add channel dimension
  99.  
  100. def get_state(self):
  101. return np.array([self.player_pos[0], self.player_pos[1], self.score, self.player_health])
  102.  
  103. def step(self, action):
  104. # Define actions based on action ID
  105. if action == 1 and self.player_pos[1] > 0:
  106. self.player_pos[1] -= 5
  107. elif action == 2 and self.player_pos[1] < HEIGHT:
  108. self.player_pos[1] += 5
  109. elif action == 3 and self.player_pos[0] > 0:
  110. self.player_pos[0] -= 5
  111. elif action == 4 and self.player_pos[0] < WIDTH:
  112. self.player_pos[0] += 5
  113. elif action == 5:
  114. # Shoot bullet towards mouse position
  115. mouse_x = random.randint(0,WIDTH)
  116. mouse_y = random.randint(self.player_pos[1],HEIGHT)
  117. dx, dy = mouse_x - self.player_pos[0], mouse_y - self.player_pos[1]
  118. dist = math.hypot(dx, dy)
  119. dist = dist + 1.e-6
  120. dx, dy = dx / dist, dy / dist
  121. self.bullets.append([self.player_pos[0], self.player_pos[1], dx * self.bullet_speed, dy * self.bullet_speed])
  122. elif action == 6:
  123. # Shoot bullet towards mouse position
  124. mouse_x = random.randint(0,WIDTH)
  125. mouse_y = random.randint(0,self.player_pos[1])
  126. dx, dy = mouse_x - self.player_pos[0], mouse_y - self.player_pos[1]
  127. dist = math.hypot(dx, dy)
  128. dist = dist + 1.e-6
  129. dx, dy = dx / dist, dy / dist
  130. self.bullets.append([self.player_pos[0], self.player_pos[1], dx * self.bullet_speed, dy * self.bullet_speed])
  131.  
  132. # Update game elements
  133. reward, done = self.update_game()
  134. return self.get_state(), reward, done
  135.  
  136. def detect_collision(self, obj1, obj2, size1, size2):
  137. dx = obj1[0] - obj2[0]
  138. dy = obj1[1] - obj2[1]
  139. distance = math.sqrt(dx ** 2 + dy ** 2)
  140. return distance < (size1 + size2) / 2
  141.  
  142. def update_game(self):
  143. # Spawn enemies occasionally
  144. if random.randint(1, 100) < 10:
  145. self.spawn_enemy()
  146.  
  147. # Move enemies down
  148. for enemy in self.enemies:
  149. enemy[1] += self.enemy_speed
  150. if self.detect_collision(enemy, self.player_pos, 40, 50):
  151. self.player_health -= 1
  152. self.enemies.remove(enemy)
  153. if self.player_health <= 0:
  154. return -100, True # End game if health is depleted
  155.  
  156. # Handle Bullets
  157. self.handle_bullets()
  158.  
  159. # Collision detection
  160. for enemy in self.enemies[:]:
  161. if self.detect_collision(enemy, self.player_pos, self.enemy_size, self.player_size):
  162. self.enemies.remove(enemy)
  163. self.player_health -= 1
  164. if self.player_health <= 0:
  165. print("Game Over")
  166. self.score += -100
  167. else:
  168. for bullet in self.bullets[:]:
  169. if self.detect_collision(bullet, enemy, self.bullet_size, self.enemy_size):
  170. self.bullets.remove(bullet)
  171. self.enemies.remove(enemy)
  172. self.score += 2
  173. break
  174.  
  175. # Reward for surviving and score increment
  176. self.score += 1
  177. return self.score, False
  178.  
  179. def close(self):
  180. pygame.quit()
  181.  
  182. class DQNAgent:
  183. def __init__(self, state_size, action_size):
  184. self.state_size = state_size
  185. self.action_size = action_size
  186. self.memory = deque(maxlen=2000)
  187. self.gamma = 0.95 # Discount factor
  188. self.epsilon = 1.0 # Exploration rate
  189. self.epsilon_min = 0.01
  190. self.epsilon_decay = 0.995
  191. self.learning_rate = 0.001
  192. #self.model = self._build_model()
  193. self.discount_factor = 0.99
  194. self.exploration_rate = 1.0
  195. self.exploration_decay = 0.995
  196. self.qvalues = {}
  197.  
  198. def __hash(self, state, action):
  199. return hash((state.tobytes(), action))
  200.  
  201. def get_q_value(self, state, action):
  202. # Return the Q-value for a given state-action pair; default to 0 if not present
  203. state = np.reshape(state, [1, self.state_size])
  204. #return self.qvalues.get((tuple(state), action), 0.0)
  205. return self.qvalues.get(self.__hash(state, action), 0.0)
  206.  
  207. def update_q_value(self, state, action, reward, next_state):
  208. # Find the maximum Q-value for the next state
  209. max_next_q = max([self.get_q_value(next_state, a) for a in range(self.action_size)], default=0)
  210. # Q-learning formula to update Q-value of current state-action pair
  211. current_q = self.get_q_value(state, action)
  212. new_q = current_q + self.learning_rate * (reward + self.discount_factor * max_next_q - current_q)
  213. # Update Q-value in the dictionary
  214. #self.qvalues[(tuple(state), action)] = new_q
  215. self.qvalues[self.__hash(state, action)] = new_q
  216.  
  217. def get_best_action(self, state):
  218. # Retrieve the action with the highest Q-value for the current state
  219. q_values = [self.get_q_value(state, action) for action in range(self.action_size)]
  220. max_q = max(q_values)
  221. # Return the action with the maximum Q-value (break ties randomly)
  222. best_actions = [action for action, q in enumerate(q_values) if q == max_q]
  223. return random.choice(best_actions)
  224.  
  225. def choose_action(self, state):
  226. # Epsilon-greedy policy to select action
  227. if random.random() < self.exploration_rate:
  228. return random.randint(0, self.action_size - 1) # Random action
  229. else:
  230. return self.get_best_action(state)
  231.  
  232. def decay_exploration(self):
  233. # Decay the exploration rate
  234. self.exploration_rate = max(0.01, self.exploration_rate * self.exploration_decay)
  235.  
  236. #def _build_model(self):
  237. # # Create a neural network using TensorFlow and Keras
  238. # model = models.Sequential([
  239. # layers.Dense(24, input_dim=self.state_size, activation='relu'),
  240. # layers.Dense(24, activation='relu'),
  241. # layers.Dense(self.action_size, activation='linear')
  242. # ])
  243. # #model = keras.Sequential(
  244. # # [
  245. # # keras.Input(shape=(WIDTH, HEIGHT, 3)),
  246. # # layers.Conv2D(32, 5, strides=2, activation="relu"),
  247. # # layers.Conv2D(32, 3, activation="relu"),
  248. # # layers.Dense(self.action_size, activation='linear')
  249. # # ]
  250. # #)
  251. # model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate),
  252. # loss='mse')
  253. # model.summary()
  254. # return model
  255.  
  256. def act(self, state):
  257. state = np.reshape(state, [1, self.state_size])
  258. # Epsilon-greedy action selection
  259. return self.choose_action(state)
  260.  
  261. def remember(self, state, action, reward, next_state, done):
  262. # Store experience in memory
  263. self.memory.append((state, action, reward, next_state, done))
  264.  
  265. def replay(self, batch_size):
  266. # Train the network using experience replay
  267. if len(self.memory) < batch_size:
  268. return
  269. minibatch = random.sample(self.memory, batch_size)
  270. for state, action, reward, next_state, done in minibatch:
  271. target = reward
  272. if not done:
  273. next_state = np.reshape(next_state, [1, self.state_size])
  274. #target = (reward + self.gamma *
  275. # np.amax(self.model.predict(next_state)[0]))
  276. #target_f = self.model.predict(np.reshape(state, [1, self.state_size]))
  277. target_f[0][action] = target
  278. #self.model.fit(np.reshape(state, [1, self.state_size]), target_f, epochs=1, verbose=0)
  279. if self.epsilon > self.epsilon_min:
  280. self.epsilon *= self.epsilon_decay
  281.  
  282. # Instantiate the environment and agent
  283. env = FPSGameEnv()
  284. agent = DQNAgent(state_size=4, action_size=ACTION_SPACE)
  285. batch_size = 32
  286. episodes = 5000
  287.  
  288. for e in range(episodes):
  289. env.screen.fill(BLACK)
  290.  
  291. state = env.reset()
  292. state = np.reshape(state, [1, 4])
  293. agent.decay_exploration()
  294. done = False
  295. while (not done):
  296. action = agent.act(state)
  297. next_state, reward, done = env.step(action)
  298. next_state = np.reshape(next_state, [1, 4])
  299. agent.update_q_value(state, action, reward, next_state)
  300. agent.remember(state, action, reward, next_state, done)
  301. state = next_state
  302. if done:
  303. print(f"episode: {e}/{episodes}, score: {env.score}, e: {agent.epsilon:.2}")
  304. break
  305. #if len(agent.memory) > batch_size:
  306. # agent.replay(batch_size)
  307.  
  308. # Draw everything
  309. if render:
  310. env.screen.fill(BLACK)
  311. pygame.draw.rect(env.screen, WHITE, (env.player_pos[0] - env.player_size//2, env.player_pos[1] - env.player_size//2, env.player_size, env.player_size))
  312. env.draw_enemies()
  313. env.draw_bullets()
  314. env.update_score()
  315.  
  316. pygame.display.flip()
  317. #clock.tick(30)
  318.  
  319.  
  320. env.close()
  321. #print (qvalues)
  322. #agent.model.save("fps_game.h5")
  323.  
  324.  
Advertisement
Add Comment
Please, Sign In to add comment