Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ***Labyrinth.py***
- import random
- from math import sqrt
- # Условные обозначения на поле
- red = "\033[31m"
- reset = "\033[0m"
- agent = "{}V{}".format(red, reset)
- # agent = "X"
- free_way = "."
- passed_way = "*"
- wall = "#"
- barrier = "&"
- exit_ = "0"
- class LabyrinthGame:
- # Определение наград
- reward_empty = 5
- reward_visited = -1
- reward_obstacle = -10
- reward_complete = 1000
- reward_incomplete = -1000
- reward_exit = 7
- def __init__(self, field: list[list[str], ...]):
- self.field: list[list[str], ...] = field
- self.player_row: int = 1
- self.player_col: int = 4
- self.count_of_point: int = 0
- self.complete: bool = False
- self.distance_to_exit: float | None = None
- self.exit_row = 0
- self.exit_col = 4
- # Создание препятствий на поле.
- def create_field(self):
- self.field[self.exit_row][self.exit_col] = exit_
- self.field[self.player_row][self.player_col] = agent
- for n in range(10):
- block_row = random.randint(1, 8)
- block_col = random.randint(1, 8)
- if self.field[block_row][block_col] == free_way:
- self.field[block_row][block_col] = barrier
- # Вывод игрового поля на экран.
- def print_field(self):
- for row in self.field:
- print(" ".join(row))
- def get_field(self):
- return self.field
- def set_count_of_point(self, reward: int) -> None:
- self.count_of_point += reward
- # Изменение позиции игрока на основе выбранного действия.
- def get_new_position(self, action: str) -> list:
- if action == "w":
- return [self.player_row - 1, self.player_col]
- elif action == "s":
- return [self.player_row + 1, self.player_col]
- elif action == "a":
- return [self.player_row, self.player_col - 1]
- elif action == "d":
- return [self.player_row, self.player_col + 1]
- def get_reward(self, row: int, col: int) -> int:
- reward: int = 0
- if self.field[row][col] == free_way:
- reward = self.reward_empty
- elif self.field[row][col] == passed_way:
- reward = self.reward_visited
- elif self.field[row][col] == barrier:
- reward = self.reward_obstacle
- elif self.field[row][col] == exit_ and not self.complete:
- reward = self.reward_incomplete
- if self.complete or self.check_complete():
- new_distance = self.get_new_distance(row, col)
- if self.distance_to_exit < new_distance:
- reward -= self.reward_exit
- elif self.distance_to_exit > new_distance:
- reward += self.reward_exit
- self.distance_to_exit = new_distance
- return reward
- # Считаем расстояние от игрока до выхода.
- def get_new_distance(self, row: int, col: int) -> float:
- new_distance = sqrt((self.exit_row - row)**2 + (self.exit_col - col)**2)
- return new_distance
- # Проверка удара о стену
- def is_hit_a_wall(self, row: int, col: int) -> bool:
- if self.field[row][col] == wall:
- self.count_of_point -= self.reward_obstacle
- return True
- return False
- # Обновление позиции игрока и поля
- def update_position(self, row: int, col: int) -> None:
- self.field[self.player_row][self.player_col] = passed_way
- self.player_row = row
- self.player_col = col
- self.field[self.player_row][self.player_col] = agent
- # Проверка прохождения всех клеток
- def check_complete(self) -> bool:
- if not self.complete:
- for row in self.field:
- for elem in row:
- if elem == free_way:
- return False
- self.complete = True
- self.count_of_point += self.reward_complete
- return True
- # Проверка условия окончания игры.
- def is_game_over(self) -> bool:
- if self.player_row == self.exit_row and self.player_col == self.exit_col:
- return True
- return False
- def execute_action(self, action):
- new_row, new_col = self.get_new_position(action)
- if self.is_hit_a_wall(new_row, new_col):
- new_state = self.get_field()
- reward = self.reward_obstacle
- done = False
- else:
- self.update_position(new_row, new_col)
- new_state = self.field
- reward = self.get_reward(new_row, new_col)
- done = self.is_game_over()
- return [new_state, reward, done]
- ***main.py***
- import numpy as np
- import tensorflow as tf
- from tensorflow.python.keras.models import Sequential
- from tensorflow.python.keras.layers import Dense
- from jk import current_encoded_field
- from labyrinth import LabyrinthGame
- # Define the labyrinth field
- field = [["#", "#", "#", "#", "#", "#", "#", "#", "#", "#"],
- ["#", "#", "#", "#", ".", "#", "#", "#", "#", "#"],
- ["#", ".", ".", ".", ".", ".", ".", ".", "#", "#"],
- ["#", ".", "#", "#", ".", "#", "#", ".", "#", "#"],
- ["#", ".", ".", ".", ".", "#", "#", ".", "#", "#"],
- ["#", "#", "#", ".", "#", "#", ".", ".", ".", "#"],
- ["#", "#", "#", ".", ".", ".", ".", "#", "#", "#"],
- ["#", "#", "#", "#", ".", "#", ".", "#", "#", "#"],
- ["#", "#", ".", ".", ".", "#", ".", ".", ".", "#"],
- ["#", "#", "#", "#", "#", "#", "#", "#", "#", "#"]]
- # Define the neural network model
- model = Sequential()
- model.add(Dense(128, input_dim=6, activation='relu'))
- model.add(Dense(128, activation='relu'))
- model.add(Dense(4, activation='linear'))
- # Compile the model
- model.compile(loss='mse', optimizer='adam')
- # Create the LabyrinthGame instance
- game = LabyrinthGame(field)
- # Train the neural network to navigate the labyrinth
- for _ in range(1000): # Adjust the number of episodes as needed
- state = game.get_field()
- done = False
- while not done:
- # Convert the state to the input data format for the model
- input_data = current_encoded_field[:, :, [0, 1, 2, 3, 4, 5]].reshape(1, -1, 6)
- current_input = current_encoded_field.reshape(1, -1)
- # Make a prediction using the model
- action_values = model.predict(input_data)[0]
- # Choose the action with the highest predicted value
- action = np.argmax(action_values)
- # Execute the action in the game
- new_state, reward, done = game.execute_action(action)
- # Convert the new state to the input data format for the model
- new_input_data = np.array(new_state).reshape(-1, 6)
- # Make a prediction for the new state to calculate the target
- new_action_values = model.predict(new_input_data)[0]
- # Calculate the target Q-values using the Bellman equation
- target = reward + 0.9 * np.max(new_action_values)
- # Update the Q-value for the chosen action
- action_values[action] = target
- # Reshape the input data and target for model training
- input_data = input_data.reshape(1, -1, 6)
- target = np.array([action_values])
- # Train the model on the updated input data and target
- model.fit(input_data, target, epochs=1, verbose=0)
- # Update the state
- state = new_state
- # After training, you can use the trained model to navigate the labyrinth
- state = game.get_field()
- done = False
- while not done:
- # Convert the state to the input data format for the model
- input_data = np.array(state).reshape(-1, 6)
- # Make a prediction using the model
- action_values = model.predict(input_data)[0]
- # Choose the action with the highest predicted value
- action = np.argmax(action_values)
- # Execute the action in the game
- new_state, reward, done = game.execute_action(action)
- # Update the state
- state = new_state
- # Print the updated field
- game.print_field()
- print()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement