Untitled

    ***Labyrinth.py***

import random
from math import sqrt


# Условные обозначения на поле
red = "\033[31m"
reset = "\033[0m"
agent = "{}V{}".format(red, reset)
# agent = "X"
free_way = "."
passed_way = "*"
wall = "#"
barrier = "&"
exit_ = "0"


class LabyrinthGame:
    # Определение наград
    reward_empty = 5
    reward_visited = -1
    reward_obstacle = -10
    reward_complete = 1000
    reward_incomplete = -1000
    reward_exit = 7

    def __init__(self, field: list[list[str], ...]):
        self.field: list[list[str], ...] = field
        self.player_row: int = 1
        self.player_col: int = 4
        self.count_of_point: int = 0
        self.complete: bool = False
        self.distance_to_exit: float | None = None
        self.exit_row = 0
        self.exit_col = 4

    # Создание препятствий на поле.
    def create_field(self):
        self.field[self.exit_row][self.exit_col] = exit_
        self.field[self.player_row][self.player_col] = agent
        for n in range(10):
            block_row = random.randint(1, 8)
            block_col = random.randint(1, 8)
            if self.field[block_row][block_col] == free_way:
                self.field[block_row][block_col] = barrier

    # Вывод игрового поля на экран.
    def print_field(self):
        for row in self.field:
            print(" ".join(row))

    def get_field(self):
        return self.field

    def set_count_of_point(self, reward: int) -> None:
        self.count_of_point += reward

    # Изменение позиции игрока на основе выбранного действия.
    def get_new_position(self, action: str) -> list:
        if action == "w":
            return [self.player_row - 1, self.player_col]
        elif action == "s":
            return [self.player_row + 1, self.player_col]
        elif action == "a":
            return [self.player_row, self.player_col - 1]
        elif action == "d":
            return [self.player_row, self.player_col + 1]

    def get_reward(self, row: int, col: int) -> int:
        reward: int = 0
        if self.field[row][col] == free_way:
            reward = self.reward_empty
        elif self.field[row][col] == passed_way:
            reward = self.reward_visited
        elif self.field[row][col] == barrier:
            reward = self.reward_obstacle
        elif self.field[row][col] == exit_ and not self.complete:
            reward = self.reward_incomplete
        if self.complete or self.check_complete():
            new_distance = self.get_new_distance(row, col)
            if self.distance_to_exit < new_distance:
                reward -= self.reward_exit
            elif self.distance_to_exit > new_distance:
                reward += self.reward_exit
            self.distance_to_exit = new_distance
        return reward

    # Считаем расстояние от игрока до выхода.
    def get_new_distance(self, row: int, col: int) -> float:
        new_distance = sqrt((self.exit_row - row)**2 + (self.exit_col - col)**2)
        return new_distance

    # Проверка удара о стену
    def is_hit_a_wall(self, row: int, col: int) -> bool:
        if self.field[row][col] == wall:
            self.count_of_point -= self.reward_obstacle
            return True
        return False

    # Обновление позиции игрока и поля
    def update_position(self, row: int, col: int) -> None:
        self.field[self.player_row][self.player_col] = passed_way
        self.player_row = row
        self.player_col = col
        self.field[self.player_row][self.player_col] = agent

    # Проверка прохождения всех клеток
    def check_complete(self) -> bool:
        if not self.complete:
            for row in self.field:
                for elem in row:
                    if elem == free_way:
                        return False
            self.complete = True
            self.count_of_point += self.reward_complete
        return True

    # Проверка условия окончания игры.
    def is_game_over(self) -> bool:
        if self.player_row == self.exit_row and self.player_col == self.exit_col:
            return True
        return False

    def execute_action(self, action):
        new_row, new_col = self.get_new_position(action)
        if self.is_hit_a_wall(new_row, new_col):
            new_state = self.get_field()
            reward = self.reward_obstacle
            done = False
        else:
            self.update_position(new_row, new_col)
            new_state = self.field
            reward = self.get_reward(new_row, new_col)
            done = self.is_game_over()

        return [new_state, reward, done]


    ***main.py***

import numpy as np
import tensorflow as tf
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense

from jk import current_encoded_field
from labyrinth import LabyrinthGame
# Define the labyrinth field
field = [["#", "#", "#", "#", "#", "#", "#", "#", "#", "#"],
         ["#", "#", "#", "#", ".", "#", "#", "#", "#", "#"],
         ["#", ".", ".", ".", ".", ".", ".", ".", "#", "#"],
         ["#", ".", "#", "#", ".", "#", "#", ".", "#", "#"],
         ["#", ".", ".", ".", ".", "#", "#", ".", "#", "#"],
         ["#", "#", "#", ".", "#", "#", ".", ".", ".", "#"],
         ["#", "#", "#", ".", ".", ".", ".", "#", "#", "#"],
         ["#", "#", "#", "#", ".", "#", ".", "#", "#", "#"],
         ["#", "#", ".", ".", ".", "#", ".", ".", ".", "#"],
         ["#", "#", "#", "#", "#", "#", "#", "#", "#", "#"]]

# Define the neural network model
model = Sequential()
model.add(Dense(128, input_dim=6, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(4, activation='linear'))

# Compile the model
model.compile(loss='mse', optimizer='adam')

# Create the LabyrinthGame instance
game = LabyrinthGame(field)

# Train the neural network to navigate the labyrinth
for _ in range(1000):  # Adjust the number of episodes as needed
    state = game.get_field()
    done = False
    while not done:
        # Convert the state to the input data format for the model
        input_data = current_encoded_field[:, :, [0, 1, 2, 3, 4, 5]].reshape(1, -1, 6)

        current_input = current_encoded_field.reshape(1, -1)

        # Make a prediction using the model
        action_values = model.predict(input_data)[0]

        # Choose the action with the highest predicted value
        action = np.argmax(action_values)

        # Execute the action in the game
        new_state, reward, done = game.execute_action(action)

        # Convert the new state to the input data format for the model
        new_input_data = np.array(new_state).reshape(-1, 6)

        # Make a prediction for the new state to calculate the target
        new_action_values = model.predict(new_input_data)[0]

        # Calculate the target Q-values using the Bellman equation
        target = reward + 0.9 * np.max(new_action_values)

        # Update the Q-value for the chosen action
        action_values[action] = target

        # Reshape the input data and target for model training
        input_data = input_data.reshape(1, -1, 6)
        target = np.array([action_values])

        # Train the model on the updated input data and target
        model.fit(input_data, target, epochs=1, verbose=0)

        # Update the state
        state = new_state

# After training, you can use the trained model to navigate the labyrinth
state = game.get_field()
done = False
while not done:
    # Convert the state to the input data format for the model
    input_data = np.array(state).reshape(-1, 6)

    # Make a prediction using the model
    action_values = model.predict(input_data)[0]

    # Choose the action with the highest predicted value
    action = np.argmax(action_values)

    # Execute the action in the game
    new_state, reward, done = game.execute_action(action)

    # Update the state
    state = new_state

    # Print the updated field
    game.print_field()
    print()