Advertisement
Guest User

Untitled

a guest
May 24th, 2023
63
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 8.22 KB | Source Code | 0 0
  1.     ***Labyrinth.py***
  2.  
  3. import random
  4. from math import sqrt
  5.  
  6.  
  7. # Условные обозначения на поле
  8. red = "\033[31m"
  9. reset = "\033[0m"
  10. agent = "{}V{}".format(red, reset)
  11. # agent = "X"
  12. free_way = "."
  13. passed_way = "*"
  14. wall = "#"
  15. barrier = "&"
  16. exit_ = "0"
  17.  
  18.  
  19. class LabyrinthGame:
  20.     # Определение наград
  21.     reward_empty = 5
  22.     reward_visited = -1
  23.     reward_obstacle = -10
  24.     reward_complete = 1000
  25.     reward_incomplete = -1000
  26.     reward_exit = 7
  27.  
  28.     def __init__(self, field: list[list[str], ...]):
  29.         self.field: list[list[str], ...] = field
  30.         self.player_row: int = 1
  31.         self.player_col: int = 4
  32.         self.count_of_point: int = 0
  33.         self.complete: bool = False
  34.         self.distance_to_exit: float | None = None
  35.         self.exit_row = 0
  36.         self.exit_col = 4
  37.  
  38.     # Создание препятствий на поле.
  39.     def create_field(self):
  40.         self.field[self.exit_row][self.exit_col] = exit_
  41.         self.field[self.player_row][self.player_col] = agent
  42.         for n in range(10):
  43.             block_row = random.randint(1, 8)
  44.             block_col = random.randint(1, 8)
  45.             if self.field[block_row][block_col] == free_way:
  46.                 self.field[block_row][block_col] = barrier
  47.  
  48.     # Вывод игрового поля на экран.
  49.     def print_field(self):
  50.         for row in self.field:
  51.             print(" ".join(row))
  52.  
  53.     def get_field(self):
  54.         return self.field
  55.  
  56.     def set_count_of_point(self, reward: int) -> None:
  57.         self.count_of_point += reward
  58.  
  59.     # Изменение позиции игрока на основе выбранного действия.
  60.     def get_new_position(self, action: str) -> list:
  61.         if action == "w":
  62.             return [self.player_row - 1, self.player_col]
  63.         elif action == "s":
  64.             return [self.player_row + 1, self.player_col]
  65.         elif action == "a":
  66.             return [self.player_row, self.player_col - 1]
  67.         elif action == "d":
  68.             return [self.player_row, self.player_col + 1]
  69.  
  70.     def get_reward(self, row: int, col: int) -> int:
  71.         reward: int = 0
  72.         if self.field[row][col] == free_way:
  73.             reward = self.reward_empty
  74.         elif self.field[row][col] == passed_way:
  75.             reward = self.reward_visited
  76.         elif self.field[row][col] == barrier:
  77.             reward = self.reward_obstacle
  78.         elif self.field[row][col] == exit_ and not self.complete:
  79.             reward = self.reward_incomplete
  80.         if self.complete or self.check_complete():
  81.             new_distance = self.get_new_distance(row, col)
  82.             if self.distance_to_exit < new_distance:
  83.                 reward -= self.reward_exit
  84.             elif self.distance_to_exit > new_distance:
  85.                 reward += self.reward_exit
  86.             self.distance_to_exit = new_distance
  87.         return reward
  88.  
  89.     # Считаем расстояние от игрока до выхода.
  90.     def get_new_distance(self, row: int, col: int) -> float:
  91.         new_distance = sqrt((self.exit_row - row)**2 + (self.exit_col - col)**2)
  92.         return new_distance
  93.  
  94.     # Проверка удара о стену
  95.     def is_hit_a_wall(self, row: int, col: int) -> bool:
  96.         if self.field[row][col] == wall:
  97.             self.count_of_point -= self.reward_obstacle
  98.             return True
  99.         return False
  100.  
  101.     # Обновление позиции игрока и поля
  102.     def update_position(self, row: int, col: int) -> None:
  103.         self.field[self.player_row][self.player_col] = passed_way
  104.         self.player_row = row
  105.         self.player_col = col
  106.         self.field[self.player_row][self.player_col] = agent
  107.  
  108.     # Проверка прохождения всех клеток
  109.     def check_complete(self) -> bool:
  110.         if not self.complete:
  111.             for row in self.field:
  112.                 for elem in row:
  113.                     if elem == free_way:
  114.                         return False
  115.             self.complete = True
  116.             self.count_of_point += self.reward_complete
  117.         return True
  118.  
  119.     # Проверка условия окончания игры.
  120.     def is_game_over(self) -> bool:
  121.         if self.player_row == self.exit_row and self.player_col == self.exit_col:
  122.             return True
  123.         return False
  124.  
  125.     def execute_action(self, action):
  126.         new_row, new_col = self.get_new_position(action)
  127.         if self.is_hit_a_wall(new_row, new_col):
  128.             new_state = self.get_field()
  129.             reward = self.reward_obstacle
  130.             done = False
  131.         else:
  132.             self.update_position(new_row, new_col)
  133.             new_state = self.field
  134.             reward = self.get_reward(new_row, new_col)
  135.             done = self.is_game_over()
  136.  
  137.         return [new_state, reward, done]
  138.  
  139.  
  140.     ***main.py***
  141.  
  142. import numpy as np
  143. import tensorflow as tf
  144. from tensorflow.python.keras.models import Sequential
  145. from tensorflow.python.keras.layers import Dense
  146.  
  147. from jk import current_encoded_field
  148. from labyrinth import LabyrinthGame
  149. # Define the labyrinth field
  150. field = [["#", "#", "#", "#", "#", "#", "#", "#", "#", "#"],
  151.          ["#", "#", "#", "#", ".", "#", "#", "#", "#", "#"],
  152.          ["#", ".", ".", ".", ".", ".", ".", ".", "#", "#"],
  153.          ["#", ".", "#", "#", ".", "#", "#", ".", "#", "#"],
  154.          ["#", ".", ".", ".", ".", "#", "#", ".", "#", "#"],
  155.          ["#", "#", "#", ".", "#", "#", ".", ".", ".", "#"],
  156.          ["#", "#", "#", ".", ".", ".", ".", "#", "#", "#"],
  157.          ["#", "#", "#", "#", ".", "#", ".", "#", "#", "#"],
  158.          ["#", "#", ".", ".", ".", "#", ".", ".", ".", "#"],
  159.          ["#", "#", "#", "#", "#", "#", "#", "#", "#", "#"]]
  160.  
  161. # Define the neural network model
  162. model = Sequential()
  163. model.add(Dense(128, input_dim=6, activation='relu'))
  164. model.add(Dense(128, activation='relu'))
  165. model.add(Dense(4, activation='linear'))
  166.  
  167. # Compile the model
  168. model.compile(loss='mse', optimizer='adam')
  169.  
  170. # Create the LabyrinthGame instance
  171. game = LabyrinthGame(field)
  172.  
  173. # Train the neural network to navigate the labyrinth
  174. for _ in range(1000):  # Adjust the number of episodes as needed
  175.     state = game.get_field()
  176.     done = False
  177.     while not done:
  178.         # Convert the state to the input data format for the model
  179.         input_data = current_encoded_field[:, :, [0, 1, 2, 3, 4, 5]].reshape(1, -1, 6)
  180.  
  181.         current_input = current_encoded_field.reshape(1, -1)
  182.  
  183.         # Make a prediction using the model
  184.         action_values = model.predict(input_data)[0]
  185.  
  186.         # Choose the action with the highest predicted value
  187.         action = np.argmax(action_values)
  188.  
  189.         # Execute the action in the game
  190.         new_state, reward, done = game.execute_action(action)
  191.  
  192.         # Convert the new state to the input data format for the model
  193.         new_input_data = np.array(new_state).reshape(-1, 6)
  194.  
  195.         # Make a prediction for the new state to calculate the target
  196.         new_action_values = model.predict(new_input_data)[0]
  197.  
  198.         # Calculate the target Q-values using the Bellman equation
  199.         target = reward + 0.9 * np.max(new_action_values)
  200.  
  201.         # Update the Q-value for the chosen action
  202.         action_values[action] = target
  203.  
  204.         # Reshape the input data and target for model training
  205.         input_data = input_data.reshape(1, -1, 6)
  206.         target = np.array([action_values])
  207.  
  208.         # Train the model on the updated input data and target
  209.         model.fit(input_data, target, epochs=1, verbose=0)
  210.  
  211.         # Update the state
  212.         state = new_state
  213.  
  214. # After training, you can use the trained model to navigate the labyrinth
  215. state = game.get_field()
  216. done = False
  217. while not done:
  218.     # Convert the state to the input data format for the model
  219.     input_data = np.array(state).reshape(-1, 6)
  220.  
  221.     # Make a prediction using the model
  222.     action_values = model.predict(input_data)[0]
  223.  
  224.     # Choose the action with the highest predicted value
  225.     action = np.argmax(action_values)
  226.  
  227.     # Execute the action in the game
  228.     new_state, reward, done = game.execute_action(action)
  229.  
  230.     # Update the state
  231.     state = new_state
  232.  
  233.     # Print the updated field
  234.     game.print_field()
  235.     print()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement