GroX24

neurodrone

Apr 26th, 2024 (edited)
612
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 37.02 KB | None | 0 0
  1. import pygame as pg
  2. from math import sin, cos, pi, ceil, floor
  3.  
  4. import torch as T
  5. from torch import nn
  6. import torch.nn.functional as F
  7. import torch.optim as optim
  8. import numpy as np
  9. from numpy.random import random as nprand
  10. import matplotlib.pyplot as plt
  11. import os
  12. import csv
  13. import pygad, pygad.torchga
  14. import joblib
  15.  
  16. # import time
  17.  
  18. # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  19.  
  20. WIDTH, HEIGHT = 1000, 600
  21. m = 1  # drone mass
  22. g = 4  # grav. acceleration
  23. dt = 4 / 60
  24. l = 1  # length of the base
  25. eng_l = 0.25  # length of the engine (there are two of them on the left and on the right)
  26. d = 0.25  # height of both the base and the engines
  27. drag = 0.1  # drag coefficient
  28. maxthr = 4  # max engine thrust
  29. thr_incr = maxthr * dt / 1  # increment by which the power is changed according to the key presses
  30. I = (m * (l + 2 * eng_l) ** 2 / 12)  # Moment of inertia for a thin rod
  31. fontsize = 18
  32.  
  33.  
  34. # image = pg.image.load("undrtale.png")
  35.  
  36.  
  37. if __name__ == "__main__":
  38.     pg.init()
  39.     font = pg.font.SysFont("arial", fontsize)
  40.  
  41.  
  42. class NewrNet(nn.Module):
  43.     def __init__(self, n_state, n_actions, n_layers, n_neurons, lr=0.001, use_cuda=True):
  44.         super().__init__()
  45.         self.layers = nn.ModuleList()
  46.         self.len = n_layers
  47.         self.n_state = n_state
  48.         self.n_actions = n_actions
  49.         if n_layers == 1:
  50.             self.layers.append(nn.Linear(n_state, n_actions))
  51.         else:
  52.             self.layers.append(nn.Linear(n_state, n_neurons))
  53.             for i in range(n_layers - 2):
  54.                 self.layers.append(nn.Linear(n_neurons, n_neurons))
  55.             self.layers.append(nn.Linear(n_neurons, n_actions))
  56.         self.optimizer = optim.Adam(self.parameters(), lr=lr)
  57.         self.loss = nn.MSELoss()
  58.         self.device = T.device('cuda' if T.cuda.is_available() and use_cuda else 'cpu')
  59.         self.to(self.device)
  60.         print(f"using {self.device}")
  61.  
  62.     def forward(self, x):
  63.         # start = time.time_ns() / 1e6
  64.         for i in range(self.len - 1):
  65.             x = F.relu(self.layers[i](x))
  66.         # end = time.time_ns() / 1e6
  67.         # print(f"QNet forward time: {end - start} ms")
  68.         return self.layers[-1](x)
  69.  
  70.  
  71. class MDPMemory():
  72.     def __init__(self, max_mem, n_state):
  73.         self.mem_size = max_mem
  74.         self.n_state = n_state
  75.         self.state_memory = np.zeros((self.mem_size, n_state), dtype=np.float32)
  76.         self.new_state_memory = np.zeros((self.mem_size, n_state), dtype=np.float32)
  77.         self.action_memory = np.zeros(self.mem_size, dtype=np.int32)
  78.         self.reward_memory = np.zeros(self.mem_size, dtype=np.float32)
  79.         self.terminal_memory = np.zeros(self.mem_size, dtype=np.bool_)
  80.         self.mem_countr = 0
  81.  
  82.     def store(self, state, action, reward, new_state, done):
  83.         i = self.mem_countr % self.mem_size
  84.         self.state_memory[i] = state
  85.         self.action_memory[i] = action
  86.         self.reward_memory[i] = reward
  87.         self.new_state_memory[i] = new_state
  88.         self.terminal_memory[i] = done
  89.         self.mem_countr += 1
  90.  
  91.  
  92. class Agent():
  93.     def __init__(self, gamma, eps, lr, n_state, n_actions, batch_size, max_mem=100000,
  94.                  eps_end=0.01, eps_dec=5e-4, n_layers=3, n_neurons=128, use_cuda=True):
  95.         self.gamma = gamma
  96.         self.eps = eps
  97.         self.eps_min = eps_end
  98.         self.eps_dec = eps_dec
  99.         self.action_space = [i for i in range(n_actions)]
  100.         self.n_state = n_state
  101.         self.n_actions = n_actions
  102.         self.n_layers = n_layers
  103.         self.n_neurons = n_neurons
  104.         self.lr = lr
  105.         self.batch_size = batch_size
  106.         self.mem_size = max_mem
  107.         self.mem_countr = 0
  108.  
  109.         self.eval = NewrNet(n_state, n_actions, n_layers, n_neurons, lr, use_cuda=use_cuda)
  110.  
  111.         self.state_memory = np.zeros((self.mem_size, n_state), dtype=np.float32)
  112.         self.new_state_memory = np.zeros((self.mem_size, n_state), dtype=np.float32)
  113.         self.action_memory = np.zeros(self.mem_size, dtype=np.int32)
  114.         self.reward_memory = np.zeros(self.mem_size, dtype=np.float32)
  115.         self.terminal_memory = np.zeros(self.mem_size, dtype=np.bool_)
  116.  
  117.     def store_transition(self, state, action, reward, new_state, done):
  118.         i = self.mem_countr % self.mem_size
  119.         self.state_memory[i] = state
  120.         self.action_memory[i] = action
  121.         self.reward_memory[i] = reward
  122.         self.new_state_memory[i] = new_state
  123.         self.terminal_memory[i] = done
  124.         self.mem_countr += 1
  125.  
  126.     def policy(self, state):
  127.         if np.random.random() < self.eps:
  128.             action = np.random.choice(self.action_space)
  129.         else:
  130.             state = T.tensor([state]).to(self.eval.device)
  131.             actions = self.eval.forward(state)
  132.             action = T.argmax(actions).item()
  133.         return action
  134.  
  135.     def learn(self):
  136.         if self.mem_countr < self.batch_size:
  137.             return
  138.         self.eval.optimizer.zero_grad()
  139.         mem = min(self.mem_size, self.mem_countr)
  140.         batch = np.random.choice(mem, self.batch_size, replace=False)
  141.         batch_index = np.arange(self.batch_size, dtype=np.int32)
  142.  
  143.         state_batch = T.from_numpy(self.state_memory[batch]).to(self.eval.device)
  144.         new_state_batch = T.from_numpy(self.new_state_memory[batch]).to(self.eval.device)
  145.         reward_batch = T.from_numpy(self.reward_memory[batch]).to(self.eval.device)
  146.         terminal_batch = T.from_numpy(self.terminal_memory[batch]).to(self.eval.device)
  147.         action_batch = self.action_memory[batch]  # not necessarily a tensor
  148.  
  149.         q_eval = self.eval.forward(state_batch)[batch_index, action_batch]
  150.         nq_eval = self.eval.forward(new_state_batch)
  151.         nq_eval[terminal_batch] = 0.0
  152.  
  153.         q_target = reward_batch + self.gamma * T.max(nq_eval, dim=1)[0]
  154.         loss = self.eval.loss(q_target, q_eval).to(self.eval.device)
  155.         loss.backward()
  156.         self.eval.optimizer.step()
  157.  
  158.         self.eps = max(self.eps_min, self.eps - self.eps_dec)
  159.  
  160.     def save(self, name, special=""):
  161.         if special != "":
  162.             special = "_" + special
  163.         T.save(self.eval.state_dict(), f"{name}/net1{special}.pt")
  164.  
  165.     def load(self, name, special=""):
  166.         if special != "":
  167.             special = "_" + special
  168.         self.eval.load_state_dict(T.load(f"{name}/net1{special}.pt"))
  169.  
  170.  
  171. class DoubleQAgent():
  172.     def __init__(self, gamma, eps, lr, n_state, n_actions, batch_size, max_mem=100000,
  173.                  eps_end=0.01, eps_dec=5e-4, n_layers=3, n_neurons=64, use_cuda=True):
  174.         self.gamma = gamma
  175.         self.eps = eps
  176.         self.eps_min = eps_end
  177.         self.eps_dec = eps_dec
  178.         self.action_space = [i for i in range(n_actions)]
  179.         self.n_state = n_state
  180.         self.n_actions = n_actions
  181.         self.n_layers = n_layers
  182.         self.n_neurons = n_neurons
  183.         self.lr = lr
  184.         self.batch_size = batch_size
  185.         self.memchoice = None
  186.         self.set_eval()
  187.         self.mem_countr = 0
  188.         self.mem_size = max_mem
  189.  
  190.         self.eval = [NewrNet(n_state, n_actions, n_layers, n_neurons, lr, use_cuda=use_cuda) for _ in range(2)]
  191.  
  192.         self.mem = [MDPMemory(max_mem, n_state) for _ in range(2)]
  193.  
  194.     def set_eval(self):
  195.         self.memchoice = 1 if nprand() < 0.5 else 0
  196.  
  197.     def store_transition(self, state, action, reward, new_state, done):
  198.         self.mem[self.memchoice].store(state, action, reward, new_state, done)
  199.         self.mem_countr += 1
  200.  
  201.     def policy(self, state):
  202.         if nprand() < self.eps:
  203.             action = np.random.choice(self.action_space)
  204.         else:
  205.             state = T.tensor([state]).to(self.eval[self.memchoice].device)
  206.             actions = self.eval[self.memchoice].forward(state)
  207.             action = T.argmax(actions).item()
  208.         return action
  209.  
  210.     def learn(self):
  211.         # start = time.time_ns() / 1e6
  212.         if self.mem[self.memchoice].mem_countr < self.batch_size:
  213.             return
  214.         self.eval[self.memchoice].optimizer.zero_grad()
  215.         mem = min(self.mem[self.memchoice].mem_size, self.mem[self.memchoice].mem_countr)
  216.         batch = np.random.choice(mem, self.batch_size, replace=False)
  217.         batch_index = np.arange(self.batch_size, dtype=np.int32)
  218.  
  219.         state_batch = T.from_numpy(self.mem[self.memchoice].state_memory[batch]).to(self.eval[self.memchoice].device)
  220.         new_state_batch = T.from_numpy(self.mem[self.memchoice].new_state_memory[batch]).to(
  221.             self.eval[self.memchoice].device)
  222.         reward_batch = T.from_numpy(self.mem[self.memchoice].reward_memory[batch]).to(self.eval[self.memchoice].device)
  223.         terminal_batch = T.from_numpy(self.mem[self.memchoice].terminal_memory[batch]).to(
  224.             self.eval[self.memchoice].device)
  225.         action_batch = self.mem[self.memchoice].action_memory[batch]  # not necessarily a tensor
  226.  
  227.         q_eval = self.eval[self.memchoice].forward(state_batch)[batch_index, action_batch]
  228.         another = int(not self.memchoice)
  229.         nq_eval = self.eval[another].forward(new_state_batch)
  230.         nq_eval[terminal_batch] = 0.0
  231.  
  232.         q_target = reward_batch + self.gamma * T.max(nq_eval, dim=1)[0]
  233.         loss = self.eval[self.memchoice].loss(q_target, q_eval).to(self.eval[self.memchoice].device)
  234.         loss.backward()
  235.         self.eval[self.memchoice].optimizer.step()
  236.  
  237.         self.eps = max(self.eps_min, self.eps - self.eps_dec)
  238.         self.set_eval()
  239.         # end = time.time_ns() / 1e6
  240.         # print(f"Agent learn time: {end - start} ms")
  241.  
  242.     def save(self, name, special=""):
  243.         if special != "":
  244.             special = "_" + special
  245.         T.save(self.eval[0].state_dict(), f"{name}/net1{special}.pt")
  246.         T.save(self.eval[1].state_dict(), f"{name}/net2{special}.pt")
  247.  
  248.     def load(self, name, special=""):
  249.         if special != "":
  250.             special = "_" + special
  251.         self.eval[0].load_state_dict(T.load(f"{name}/net1{special}.pt"))
  252.         self.eval[1].load_state_dict(T.load(f"{name}/net1{special}.pt"))
  253.  
  254.  
  255. def reward_f(state):
  256.     global l, eng_l, d
  257.     (x, y, xc, yc, angle,
  258.      vx, vy, vxc, vyc, vangle,
  259.      left_thrust, right_thrust, _) = state
  260.     h, x, y = y, x - xc, y - yc
  261.     vx -= vxc
  262.     vy -= vyc
  263.     collision_punish = 100
  264.     R = 5
  265.     r = (x ** 2 + y ** 2) ** 0.5
  266.     vr = (vx ** 2 + vy ** 2) ** 0.5
  267.     precision = 10 / (vr + 1 / 10) if r < 0.5 else 0
  268.     vangle_punish = abs(vangle) / 5
  269.     if r == 0:
  270.         toward_reward = precision
  271.     else:
  272.         toward_reward = -(vx * x + vy * y) / r
  273.     if r > R:
  274.         r = R
  275.     done = h < d + l / 2 + eng_l or abs(x) > 20 or abs(y) > 20
  276.     reward = (((1 - r / R) * 10 + 1 + precision + toward_reward - vangle_punish) * 0.01 -
  277.               collision_punish * int(done))
  278.     return reward, done
  279.  
  280.  
  281. def fitness_per_frame(state):
  282.     global l, eng_l, d, maxthr
  283.     (x, y, xc, yc, angle,
  284.      vx, vy, vxc, vyc, vangle,
  285.      left_thrust, right_thrust, _) = state
  286.     h, x, y = y, x - xc, y - yc
  287.     vx -= vxc
  288.     vy -= vyc
  289.     collision_punish = 100
  290.     R = .125
  291.     r = (x ** 2 + y ** 2) ** 0.5
  292.     vr = (vx ** 2 + vy ** 2) ** 0.5
  293.     consumption = min(left_thrust + right_thrust, 4)
  294.     done = h < d + l / 2 + eng_l or abs(x) > 20 or abs(y) > 20
  295.     reward = (1 + (R * 20) / (r + R) / (vr + 0.5) / (consumption + 1) - abs(vangle) / 10) * dt - collision_punish * int(
  296.         done)
  297.     return reward, done
  298.  
  299.  
  300. def fitness(ga_instanse=None, solution=None, sol_idx=None):
  301.     global model, device
  302.     fitness, state = simstep([True])
  303.     for step in range(1000):
  304.         observation = T.tensor([get_observation3(state)]).to(device)
  305.         action = T.flatten(pygad.torchga.predict(model=model, solution=solution, data=observation)).tolist()
  306.         fpf, state = simstep(state, action=action, reward=fitness_per_frame)
  307.         fitness += fpf
  308.         if state[-1]:
  309.             break
  310.     return fitness
  311.  
  312.  
  313. gen = 0
  314. renderme = False
  315.  
  316.  
  317. def on_generation(ga_instance):
  318.     global gen, renderme
  319.     gen += 1
  320.     print(f"Generation = {ga_instance.generations_completed}")
  321.     if not gen % 10:
  322.         solution, fitness, id = ga_instance.best_solution()
  323.         print(f"Fitness    = {fitness}")
  324.         quickrender(model, min(10, ga_instance.generations_completed // 5), pyga=True, solution=solution)
  325.         temp = nn.Sequential(nn.Linear(n_in, n_neur),
  326.                              nn.ReLU(),
  327.                              nn.Linear(n_neur, n_neur),
  328.                              nn.ReLU(),
  329.                              nn.Linear(n_neur, n_out))
  330.         temp.load_state_dict(pygad.torchga.model_weights_as_dict(model=model, weights_vector=solution))
  331.         T.save(temp.state_dict(), f"{folder}/model_{gen}.pt")
  332.  
  333.  
  334. def weights_init_uniform_rule(m):
  335.     classname = m.__class__.__name__
  336.     # for every Linear layer in a model
  337.     if classname.find('Linear') != -1:
  338.         # get the number of the inputs
  339.         n = m.in_features
  340.         y = 1.0 / np.sqrt(n)
  341.         m.weight.data.uniform_(-y, y)
  342.         m.bias.data.fill_(0)
  343.  
  344.  
  345. folder = "mltthr alg 1"
  346. n_in = 8
  347. n_neur = 48
  348. n_out = 2
  349. model = nn.Sequential(nn.Linear(n_in, n_neur),
  350.                       nn.ReLU(),
  351.                       nn.Linear(n_neur, n_neur),
  352.                       nn.ReLU(),
  353.                       nn.Linear(n_neur, n_out))
  354. # model.apply(weights_init_uniform_rule)
  355. # device = T.device('cuda' if T.cuda.is_available() else 'cpu')
  356. device = T.device('cpu')
  357. model.to(device)
  358.  
  359.  
  360. def quickrender(model, n_ep, pyga=False, solution=None):
  361.     font = pg.font.SysFont("arial", fontsize)
  362.     global device
  363.     fitness, state = simstep([True])
  364.     scale = 100
  365.     clock = pg.time.Clock()
  366.     do_render = True
  367.     screen = pg.display.set_mode((WIDTH, HEIGHT))
  368.     pg.display.set_caption('Drone thingy')
  369.     for i in range(n_ep):
  370.         fitness = 0
  371.         for step in range(2000):
  372.             for event in pg.event.get():
  373.                 if event.type == pg.QUIT:
  374.                     return
  375.                 if event.type == pg.KEYDOWN:
  376.                     if event.key == pg.K_r:
  377.                         do_render = True
  378.                     elif event.key == pg.K_SPACE:
  379.                         do_render = False
  380.             if do_render:
  381.                 cam = (WIDTH, HEIGHT, scale, state[0], max(2, state[1]))
  382.                 screen = render(state, fitness, screen, cam, scale, WIDTH, HEIGHT, i, font)
  383.                 pg.display.flip()
  384.                 clock.tick(60)
  385.             observation = T.tensor([get_observation3(state)]).to(device)
  386.             if pyga:
  387.                 action = T.flatten(pygad.torchga.predict(model=model, solution=solution, data=observation)).tolist()
  388.             else:
  389.                 action = T.flatten(model(observation.clone().detach())).tolist()
  390.             fpf, state = simstep(state, action=action, reward=fitness_per_frame)
  391.             fitness += fpf
  392.             if state[-1]:
  393.                 break
  394.         #state[-1] = True
  395.  
  396.  
  397. def gmain():
  398.     '''
  399.    observation3: [xc', yc', h, sin, cos, vx, vy, vxc', vxy', vangle] - 8
  400.    actions3 = (left_thr, right_thr) - 2
  401.    '''
  402.     preload = False
  403.     if os.path.exists(folder) and not preload:
  404.         print("FUCK YOU CHOOSE ANOTHER FOLDER")
  405.         return
  406.     elif preload and os.path.exists(f"{folder}/model_50.pt"):
  407.         model.load_state_dict(T.load(f"{folder}/model_50.pt"))
  408.         pass
  409.     else:
  410.         os.mkdir(folder)
  411.     torch_ga = pygad.torchga.TorchGA(model=model,
  412.                                      num_solutions=100)
  413.     num_generations = 2500  # Number of generations.
  414.     num_parents_mating = 30  # Number of solutions to be selected as parents in the mating pool.
  415.     initial_population = torch_ga.population_weights  # Initial population of network weights
  416.  
  417.     ga_instance = pygad.GA(num_generations=num_generations,
  418.                            num_parents_mating=num_parents_mating,
  419.                            initial_population=initial_population,
  420.                            fitness_func=fitness,
  421.                            on_generation=on_generation,
  422.                            mutation_type="adaptive",
  423.                            mutation_probability=(0.01, 0.0),
  424.                            random_mutation_min_val=-0.5,
  425.                            random_mutation_max_val=0.5)
  426.  
  427.     ga_instance.run()
  428.  
  429.     # After the generations complete, some plots are showed that summarize how the outputs/fitness values evolve over generations.
  430.     ga_instance.plot_fitness(title="PyGAD & PyTorch - Iteration vs. Fitness", linewidth=4)
  431.     solution, solution_fitness, solution_idx = ga_instance.best_solution()
  432.     print(f"Fitness value of the best solution = {solution_fitness:.2f}")
  433.     print(f"Index of the best solution : {solution_idx}")
  434.     bestmodel = pygad.torchga.model_weights_as_dict(model=model, weights_vector=solution)
  435.     model.load_state_dict(bestmodel)
  436.     model.to(device)
  437.     T.save(model.state_dict(), f"{folder}/model.pt")
  438.  
  439.     quickrender(model, 1000)
  440.  
  441.  
  442. def eval_genomes_thread(chromosome):
  443.     return fitness(solution=chromosome)
  444.  
  445.  
  446. def next_generation(chromosomes, fitnesses, ratio_selected=0.5, ratio_mutated=0.5, mutation_prob=0.05,
  447.                     mutation_range=0.5, cross_prob=0.5):
  448.     pop_size = np.shape(chromosomes)[0]
  449.     chrom_size = np.shape(chromosomes)[1]
  450.     fitnessTotal = np.sum(fitnesses)
  451.  
  452.     sorting = np.argsort(-fitnesses)
  453.     fitnesses = fitnesses[sorting]  # fitnesses now in descending order
  454.     sorted_chromosomes = chromosomes[sorting]
  455.     n_selected = int(pop_size * ratio_selected)
  456.     new_chromosomes = np.zeros(shape=(pop_size, chrom_size), dtype=np.float32)
  457.     new_chromosomes[:n_selected] = sorted_chromosomes[:n_selected]
  458.     cross = np.random.randint(0, chrom_size, size=(pop_size - n_selected))
  459.     crossed = np.random.randint(0, n_selected, size=(pop_size - n_selected, 2))
  460.     for i in range(n_selected, pop_size):
  461.         # crossbreed
  462.         if nprand() < cross_prob:
  463.             new_chromosomes[i][:cross[i - n_selected]] = chromosomes[crossed[i - n_selected][0]].flatten()[
  464.                                                          :cross[i - n_selected]]
  465.             new_chromosomes[i][cross[i - n_selected]:] = chromosomes[crossed[i - n_selected][1]].flatten()[
  466.                                                          cross[i - n_selected]:]
  467.         else:
  468.             new_chromosomes[i] = chromosomes[crossed[i - n_selected][0]].flatten()
  469.         # mutate
  470.         if nprand() < ratio_mutated:
  471.             for j in range(chrom_size):
  472.                 if nprand() < mutation_prob:
  473.                     new_chromosomes[i][j] += 2 * (nprand() - 0.5) * mutation_range
  474.     return new_chromosomes, fitnesses, fitnessTotal
  475.  
  476.  
  477. def threadmain():
  478.     pg.init()
  479.     preload = True
  480.     if os.path.exists(folder) and not preload:
  481.         print("FUCK YOU CHOOSE ANOTHER FOLDER")
  482.         return
  483.     elif preload and os.path.exists(f"{folder}/model_2525.pt"):
  484.         model.load_state_dict(T.load(f"{folder}/model_2525.pt"))
  485.         pass
  486.     else:
  487.         os.mkdir(folder)
  488.     chromosomes = np.array(pygad.torchga.TorchGA(model=model,
  489.                                                  num_solutions=100).population_weights, dtype=np.float32)
  490.     # print(chromosomes)
  491.     num_generations = 10000
  492.     # fitnesses = np.zeros(pop_size, dtype=np.float32)
  493.  
  494.     for i in range(2526, num_generations):
  495.         fitnesses = np.array(joblib.Parallel(n_jobs=-1)(joblib.delayed(eval_genomes_thread)(i) for i in chromosomes))
  496.         # Performs selection, mutation, and crossover operations to create new generation
  497.         chromosomes, fitnesses, total = next_generation(chromosomes, fitnesses,
  498.                                                         ratio_selected=0.3, ratio_mutated=0.3,
  499.                                                         cross_prob=0.1, mutation_prob=0.05, mutation_range=0.1)
  500.  
  501.         print(
  502.             f"Gen {i}/{num_generations}:\tbest:{fitnesses[0]:.1f};\t"
  503.             f"second:{fitnesses[1]:.1f}\tthird:{fitnesses[2]:.1f};\ttotal:{total:.0f}".expandtabs(16))
  504.         # fitnesses = np.zeros(pop_size, dtype=np.float32)  # Wipe fitnesses
  505.         if not i % 101:
  506.             quickrender(model, min(10, i // 5), pyga=True, solution=chromosomes[0])
  507.             temp = nn.Sequential(nn.Linear(n_in, n_neur),
  508.                                  nn.ReLU(),
  509.                                  nn.Linear(n_neur, n_neur),
  510.                                  nn.ReLU(),
  511.                                  nn.Linear(n_neur, n_out))
  512.             bestmodel = pygad.torchga.model_weights_as_dict(model=temp, weights_vector=chromosomes[0])
  513.             model.load_state_dict(bestmodel)
  514.             model.to(device)
  515.             T.save(model.state_dict(), f"{folder}/model_{i}.pt")
  516.  
  517.  
  518. def reward_stay(state):
  519.     global l, eng_l, d, dt
  520.     collision_punish = 100
  521.     (x, y, xc, yc, angle,
  522.      vx, vy, vxc, vyc, vangle,
  523.      left_thrust, right_thrust, _) = state
  524.     h, x, y = y, x - xc, y - yc
  525.     vr = (vx ** 2 + vy ** 2) ** 0.5
  526.     done = h < d + l / 2 + eng_l or abs(x) > 20 or abs(y) > 20
  527.     vangle_punish = abs(vangle) / 4
  528.     return (1 / (vr + 1 / 20) - vangle_punish) * 0.01 - collision_punish * int(done), done
  529.  
  530.  
  531. def simstep(state, playable=False, action=None, reward=reward_f):
  532.     # start = time.time_ns()
  533.     global dt, m, g, l, eng_l, d, drag, maxthr, thr_incr, I
  534.     if state[-1]:
  535.         # [x, y, xc, yc, angle, vx, vy, vxc, vyc, vangle, left_thrust, right_thrust, done]
  536.         state = [(2 * nprand() - 1) * 10, 3 + nprand() * 17,  # x y
  537.                  (2 * nprand() - 1) * 10, 2 + nprand() * 18,  # xc yc
  538.                  pi * (2 * nprand() - 1) * 0.1,  # angle
  539.                  (2 * nprand() - 1) * 1, (1.5 * nprand()) * 1,  # vx, vy
  540.                  0, 0,  # vxc', vyc' (have to be initialised even with no actual info)
  541.                  pi * (2 * nprand() - 1) * 0.1,  # vangle
  542.                  maxthr * nprand() * 0, maxthr * nprand() * 0, False]  # thrust, done
  543.         return 0, state
  544.     (x, y, xc, yc, angle,
  545.      vx, vy, vxc, vyc, vangle,
  546.      left_thrust, right_thrust, done) = state
  547.  
  548.     '''# cursor
  549.    prevx = xc
  550.    prevy = yc
  551.  
  552.    # some code for moving
  553.    vxc = (xc - prevx) / dt
  554.    vyc = (yc - prevy) / dt'''
  555.  
  556.     # forces
  557.     fx = -drag * vx - (left_thrust + right_thrust) * sin(angle)
  558.     fy = - m * g - drag * vy + (left_thrust + right_thrust) * cos(angle)
  559.     torque = (right_thrust - left_thrust) * (l + eng_l) / 2 - drag * vangle * 4
  560.  
  561.     # velocities
  562.     vx += (fx / m) * dt
  563.     vy += (fy / m) * dt
  564.     vangle += (torque / I) * dt
  565.  
  566.     # position and angle
  567.     x += vx * dt
  568.     y += vy * dt
  569.     angle += vangle * dt
  570.     if angle < -pi:
  571.         angle += 2 * pi
  572.     elif angle > pi:
  573.         angle -= 2 * pi
  574.  
  575.     # Engine control
  576.     if playable:
  577.         # Adjust engine thrusts based on key presses
  578.         if pg.key.get_pressed()[pg.K_LEFT]:
  579.             left_thrust += thr_incr
  580.         else:
  581.             left_thrust -= 2 * thr_incr
  582.         if pg.key.get_pressed()[pg.K_RIGHT]:
  583.             right_thrust += thr_incr
  584.         else:
  585.             right_thrust -= 2 * thr_incr
  586.     else:
  587.         '''
  588.        if action in (1, 5):
  589.            left_thrust -= thr_incr
  590.        if action in (2, 5):
  591.            right_thrust -= thr_incr
  592.        if action in (3, 6):
  593.            left_thrust += thr_incr
  594.        if action in (4, 6):
  595.            right_thrust += thr_incr
  596.        '''
  597.         '''
  598.        if action == 1:
  599.            left_thrust -= thr_incr
  600.            right_thrust += thr_incr
  601.        elif action == 2:
  602.            left_thrust += thr_incr
  603.            right_thrust -= thr_incr
  604.        elif action == 3:
  605.            left_thrust += thr_incr
  606.            right_thrust += thr_incr
  607.        elif action == 4:
  608.            left_thrust -= thr_incr
  609.            right_thrust -= thr_incr
  610.        '''
  611.         left_thrust = action[0] * maxthr / 10
  612.         right_thrust = action[1] * maxthr / 10
  613.     left_thrust = max(0, min(left_thrust, maxthr))
  614.     right_thrust = max(0, min(right_thrust, maxthr))
  615.     newstate = [x, y, xc, yc, angle,
  616.                 vx, vy, vxc, vyc, vangle,
  617.                 left_thrust, right_thrust, done]
  618.     rew, done = reward(newstate)
  619.     newstate[-1] = done
  620.     # end = time.time_ns()
  621.     # print(f"sim time: {end - start} ns")
  622.     return rew, newstate
  623.  
  624.  
  625. def get_observation(state):
  626.     global dt
  627.     (x, y, xc, yc, angle,
  628.      vx, vy, vxc, vyc, vangle,
  629.      left_thr, right_thr, done) = state
  630.     return (xc - x, yc - y, y, sin(angle), cos(angle),
  631.             vx, vy, vxc - vx, vyc - vy, vangle,
  632.             left_thr, right_thr)
  633.  
  634.  
  635. def get_observation2(state):
  636.     global dt
  637.     (x, y, xc, yc, angle,
  638.      vx, vy, vxc, vyc, vangle,
  639.      left_thr, right_thr, done) = state
  640.     return (xc - x, yc - y, y, sin(angle), cos(angle), vx, vy, vangle, left_thr, right_thr)
  641.  
  642.  
  643. def get_observation3(state):
  644.     global dt
  645.     (x, y, xc, yc, angle,
  646.      vx, vy, vxc, vyc, vangle,
  647.      left_thr, right_thr, done) = state
  648.     return (x - xc, y - yc, y, sin(angle), cos(angle), vx, vy, vangle)
  649.  
  650.  
  651. def render_multi_line(screen, font, text, x, y, color, fsize):
  652.     lines = text.splitlines()
  653.     for i, l in enumerate(lines):
  654.         screen.blit(font.render(l, 1, color), (x, y + fsize * i))
  655.  
  656.  
  657. def drawgrid(cam, step, substeps, wl=1, dark=100, thin=0):
  658.     w, h, scale, x, y = cam
  659.     surf = pg.Surface((w, h), pg.SRCALPHA, 32)
  660.     x -= w / scale / 2
  661.     y -= h / scale / 2
  662.     xstart = floor(x / step) * step - x
  663.     ystart = y - ceil(y / step) * step
  664.     for i in range(ceil(h / step) * (substeps + 1)):
  665.         if ystart + i * step / substeps > h:
  666.             break
  667.         weaken = bool(i % (substeps + 1))
  668.         pg.draw.line(surf, (255 - weaken * dark, 255 - weaken * dark, 255 - weaken * dark),
  669.                      (0, (ystart + i * step / (substeps + 1)) * scale),
  670.                      (w, (ystart + i * step / (substeps + 1)) * scale), wl - weaken * thin)
  671.     for j in range(ceil(w / step) * (substeps + 1)):
  672.         if xstart + j * step / substeps > w:
  673.             break
  674.         weaken = bool(j % (substeps + 1))
  675.         pg.draw.line(surf, (255 - weaken * dark, 255 - weaken * dark, 255 - weaken * dark),
  676.                      ((xstart + j * step / (substeps + 1)) * scale, 0),
  677.                      ((xstart + j * step / (substeps + 1)) * scale, h), wl - weaken * thin)
  678.     return surf
  679.  
  680.  
  681. def cam_coords(cam, x, y):
  682.     w, h, scale, x0, y0 = cam
  683.     x = (x - x0) * scale + w / 2
  684.     y = (y0 - y) * scale + h / 2
  685.     return x, y
  686.  
  687.  
  688. def render(state, score, screen, cam, scale, w, h, episode_i, font):
  689.     '''render the drone, its engines, and the ground.
  690.    The camera is centered at (0, 2); 1 unit corresponds to 100px.
  691.    The background is black, the drone is also black woth a white thin outline;
  692.    the engines are also outlined. When they are turned on, little triangles appear,
  693.    which represent air/propellant/whatever. The ground is grey.
  694.    '''
  695.  
  696.     # Clear the screen
  697.     screen.fill((0, 0, 0))
  698.  
  699.     global l, eng_l, d, maxthr
  700.     # Unpack the state
  701.     x, y, xc, yc, angle, vx, vy, vxc, vyc, vangle, left_thrust, right_thrust, done = state
  702.  
  703.     # Draw the ground
  704.     pg.draw.rect(screen, (100, 100, 100), (0, cam_coords(cam, 0, 0)[1], w, h + 1))
  705.  
  706.     # Draw the grid
  707.     grid = drawgrid(cam, 4, 3, 2, thin=1)
  708.     screen.blit(grid, (0, 0))
  709.  
  710.     # Calculate the coordinates relative to the camera
  711.     xc, yc = cam_coords(cam, xc, yc)
  712.  
  713.     # Draw the cursor
  714.     pg.draw.circle(screen, (150, 255, 150), (max(min(xc, w), 0), max(min(yc, h), 0)), 0.25 * scale)
  715.  
  716.     # Draw the drone
  717.     thr_scale = 0.5 * scale
  718.     l_ = l * scale
  719.     eng_l_ = eng_l * scale
  720.     d_ = d * scale
  721.     drone_surf = pg.Surface((l_ + 2 * eng_l_, d_ + 2 * thr_scale), pg.SRCALPHA, 32)
  722.  
  723.     pg.draw.rect(drone_surf, (255, 255, 255), (0, thr_scale, eng_l_, d_), 2)  # left engine
  724.     pg.draw.rect(drone_surf, (255, 255, 255), (eng_l_, thr_scale, l_, d_), 2)  # base
  725.     pg.draw.rect(drone_surf, (255, 255, 255), (l_ + eng_l_, thr_scale, eng_l_, d_), 2)  # right engine
  726.     pg.draw.polygon(drone_surf, (255, 255, 200),
  727.                     [(0, d_ + thr_scale),
  728.                      (eng_l_ // 2, d_ + (1 + left_thrust / maxthr) * thr_scale),
  729.                      (eng_l_, d_ + thr_scale)])  # left flame
  730.     pg.draw.polygon(drone_surf, (255, 255, 200),
  731.                     [(l_ + eng_l_, d_ + thr_scale),
  732.                      (l_ + eng_l_ + eng_l_ // 2, d_ + (1 + right_thrust / maxthr) * thr_scale),
  733.                      (l_ + 2 * eng_l_, d_ + thr_scale)])  # right flame
  734.  
  735.     drone_surf = pg.transform.rotate(drone_surf, angle / pi * 180)
  736.     drone_rect = drone_surf.get_rect()
  737.     drone_rect.center = cam_coords(cam, x, y)
  738.     screen.blit(drone_surf, drone_rect)
  739.  
  740.     # Print information & "HUD"
  741.     # global image
  742.     # screen.blit(image, (0, 500))
  743.  
  744.     winfo = 3
  745.     trnsprt = 180
  746.     hud = pg.Surface((fontsize * 18 + 2 * winfo, fontsize * 8 + 2 * winfo), pg.SRCALPHA, 32)
  747.     pg.draw.rect(hud, (180, 180, 180, trnsprt), (0, 0, fontsize * 18 + 2 * winfo, fontsize * 8 + 2 * winfo))
  748.     pg.draw.rect(hud, (0, 0, 0, trnsprt), (winfo, winfo, fontsize * 18, fontsize * 8))
  749.     render_multi_line(hud, font,
  750.                       f'Coords: ({round(x, 2):.2f}, {round(y, 2):.2f}); angle: {round(angle, 2):.2f}\n'
  751.                       f'Velocity: ({round(vx, 2):.2f}, {round(vy, 2):.2f}); angular: {round(vangle, 2):.2f}\n'
  752.                       f'Thrusters: left: {round(left_thrust, 2):.2f}; right: {round(right_thrust, 2):.2f}\n'
  753.                       f'Episode: {episode_i}; Score: {score:.2f}',
  754.                       20, 10, (255, 255, 255), fontsize * 2)
  755.     screen.blit(hud, (0, 0))
  756.     return screen
  757.  
  758.  
  759. def plot_progress(fcsv, n, names, file):
  760.     with open(fcsv, "r") as f:
  761.         reader = csv.reader(f)
  762.         args = [[] for _ in range(n)]
  763.         for row in reader:
  764.             if not row:
  765.                 continue
  766.             for i in range(n):
  767.                 args[i].append(float(row[i]))
  768.         plots = [plt.scatter(args[0], args[i], s=1 / 4, c=((0.3 + i / (n + 2), 0.6 - i / (n + 3), 0.8),), linewidth=0)
  769.                  for i in range(1, n)]
  770.         plt.legend(plots, names)
  771.         plt.savefig(file, dpi=300)
  772.         plt.clf()
  773.  
  774.  
  775. def writedata(file, args):
  776.     with open(file, "a", newline='', encoding='utf-8') as f:
  777.         writer = csv.writer(f)
  778.         writer.writerow(args)
  779.  
  780.  
  781. def save(dronename, drone, episode_i, maxcount, exceeded):
  782.     drone.save(dronename)
  783.     with open(f"{dronename}/config.txt", "w") as f:
  784.         f.write("\n".join([str(episode_i), str(maxcount), str(exceeded), str(drone.gamma),
  785.                            str(drone.eps), str(drone.lr), str(drone.n_state), str(drone.n_actions),
  786.                            str(drone.batch_size), str(drone.n_layers), str(drone.n_neurons),
  787.                            str(drone.eps_dec), str(drone.eps_min), str(drone.mem_size)]))
  788.  
  789.  
  790. def load(dronename, cuda):
  791.     with open(f"{dronename}/config.txt", "r") as f:
  792.         (lastprev, maxcount, exceeded, gamma,
  793.          eps, lr, n_state, n_actions,
  794.          batch_size, n_layers, n_neurons,
  795.          eps_dec, eps_end, mem_size) = [float(i) for i in f.readlines()]
  796.         lastprev = int(lastprev) + 1
  797.         maxcount = int(maxcount)
  798.         exceeded = int(exceeded)
  799.         n_state = int(n_state)
  800.         n_actions = int(n_actions)
  801.         batch_size = int(batch_size)
  802.         n_neurons = int(n_neurons)
  803.         n_layers = int(n_layers)
  804.         mem_size = int(mem_size)
  805.         drone = DoubleQAgent(gamma, eps, lr, n_state=n_state, n_actions=n_actions, batch_size=batch_size,
  806.                              n_layers=n_layers, n_neurons=n_neurons, eps_dec=eps_dec, eps_end=eps_end, max_mem=mem_size,
  807.                              use_cuda=cuda)
  808.         drone.load(dronename)
  809.         return drone, lastprev, maxcount, exceeded
  810.  
  811.  
  812. def shift(arr, num, fill_value=np.nan):
  813.     result = np.empty_like(arr)
  814.     if num > 0:
  815.         result[:num] = fill_value
  816.         result[num:] = arr[:-num]
  817.     elif num < 0:
  818.         result[num:] = fill_value
  819.         result[:num] = arr[-num:]
  820.     else:
  821.         result[:] = arr
  822.     return result
  823.  
  824.  
  825. def main():
  826.     '''
  827.    observation (not-exactly-state): [xc', yc', h, sin, cos, vx, vy, vxc', vxy', vangle, left_thr, right_thr] - 12
  828.    observation2: [xc', yc', h, sin, cos, vx, vy, vangle, left_thr, right_thr] - 10
  829.    actions = (0:nothing, 1:left-, 2:right-, 3:left+, 4:right+, 5:both-, 6:both+) - 7
  830.    actions2 = (0:nothing, 1:left_roll, 2:right_roll, 3:both+, 4:both-) - 5
  831.    '''
  832.     pg.init()
  833.     font = pg.font.SysFont("arial", fontsize)
  834.     print(T.cuda.is_available())
  835.     scale = 100
  836.     dronename = "double q standart"
  837.     screen = pg.display.set_mode((WIDTH, HEIGHT))
  838.     pg.display.set_caption('Drone thingy')
  839.     clock = pg.time.Clock()
  840.     do_render = False
  841.     do_preload = True
  842.     playable = False
  843.     n_frames = 100_000_000
  844.     use_cuda = True
  845.     if not os.path.exists(dronename):
  846.         os.mkdir(dronename)
  847.     elif not do_preload:
  848.         print("Something already exists here! Aborting")
  849.         return
  850.     if do_preload and os.path.exists(f"{dronename}/data.csv") and os.path.exists(f"{dronename}/net1.pt"):
  851.         drone, lastprev, maxcount, exceeded = load(dronename, use_cuda)
  852.     else:
  853.         lastprev = 0
  854.         maxcount = 2000
  855.         exceeded = 0
  856.         drone = DoubleQAgent(0.99, 1, 0.001, n_state=10, n_actions=5, batch_size=256, n_layers=3,
  857.                              n_neurons=64, eps_dec=1e-5, eps_end=0.03, use_cuda=use_cuda, max_mem=100_000)
  858.     meanoverwhat = 100
  859.     scores = np.empty(meanoverwhat, dtype=np.float32)
  860.     lengths = np.empty(meanoverwhat, dtype=np.float32)
  861.     scores[:] = np.nan
  862.     lengths[:] = np.nan
  863.     currexceeded = 0
  864.     _, state = simstep([True])
  865.     ep = lastprev
  866.     score = 0
  867.     counter = 0
  868.     # cam = (WIDTH, HEIGHT, scale, state[0], state[1])
  869.     for i in range(n_frames):
  870.         for event in pg.event.get():
  871.             if event.type == pg.QUIT:
  872.                 save(dronename, drone, ep, maxcount, exceeded)
  873.                 plot_progress(f"{dronename}/data.csv", 3, ["score", "average score"],
  874.                               f"{dronename}/plot_so_far.png")
  875.                 return
  876.             if event.type == pg.KEYDOWN:
  877.                 if event.key == pg.K_r:
  878.                     do_render = True
  879.                 elif event.key == pg.K_SPACE:
  880.                     do_render = False
  881.  
  882.         observation = get_observation2(state)
  883.         action = drone.policy(observation)
  884.  
  885.         if ep < 10000:
  886.             reward, state = simstep(state, playable, action, reward=reward_stay)
  887.         else:
  888.             reward, state = simstep(state, playable, action, reward=reward_f)
  889.         if ep == 10000:
  890.             drone.eps = 0.5
  891.  
  892.         score += reward
  893.         next_observation = get_observation2(state)
  894.         drone.store_transition(observation, action, reward, next_observation, state[-1])
  895.         drone.learn()
  896.  
  897.         if do_render:
  898.             cam = (WIDTH, HEIGHT, scale, state[0], max(2, state[1]))
  899.             screen = render(state, score, screen, cam, scale, WIDTH, HEIGHT, ep, font)
  900.             pg.display.flip()
  901.             clock.tick(60)
  902.  
  903.         if counter > maxcount:
  904.             state[-1] = True
  905.             currexceeded += 1
  906.         counter += 1
  907.         if state[-1]:
  908.             scores = shift(scores, -1, score)
  909.             lengths = shift(lengths, -1, counter)
  910.             avg_score = np.nanmean(scores)
  911.             avg_len = np.nanmean(lengths)
  912.             writedata(f"{dronename}/data.csv", [ep, score, avg_score])
  913.             if not ep % 50:
  914.                 exceeded += currexceeded
  915.                 print(f"ep {ep}:\tscore:{score:.2f}\tav.sc:{avg_score:.2f}\t"
  916.                       f"ep.len:{counter}\tav.ep.len:{avg_len:0.0f}\teps:{drone.eps:.3f}\t"
  917.                       f"exceeded:{currexceeded}\t(total:{exceeded})\t"
  918.                       f"steps:{drone.mem_countr}\t({drone.mem_countr / 2 / drone.mem_size:.2f})".expandtabs(16))
  919.                 currexceeded = 0
  920.                 if not ep % 500:
  921.                     print("saving...")
  922.                     plot_progress(f"{dronename}/data.csv", 3, ["score", "average score"],
  923.                                   f"{dronename}/plot_{ep / 1000:.1f}k.png")
  924.                     drone.save(dronename, f"{ep / 1000:.1f}k")
  925.                     save(dronename, drone, ep, maxcount, exceeded)
  926.             score = 0
  927.             counter = 0
  928.             ep += 1
  929.  
  930.     save(dronename, drone, n_frames - 1, maxcount, exceeded)
  931.     return
  932.  
  933.  
  934. def showmain():
  935.     drones = ["mltthr alg 1/model_1111.pt", "mltthr alg 1/model_2222.pt", "mltthr alg 1/model_2525.pt"]
  936.     neurons = [48, 48, 48]
  937.     for i in range(len(drones)):
  938.         if i == -1:
  939.             model = nn.Sequential(
  940.                 nn.Linear(11, neurons[i]),
  941.                 nn.ReLU(),
  942.                 nn.Linear(neurons[i], 7))
  943.         else:
  944.             model = nn.Sequential(
  945.                 nn.Linear(8, neurons[i]),
  946.                 nn.ReLU(),
  947.                 nn.Linear(neurons[i], neurons[i]),
  948.                 nn.ReLU(),
  949.                 nn.Linear(neurons[i], 2))
  950.         model.load_state_dict(T.load(drones[i]))
  951.         quickrender(model, 1000 if i == len(drones) - 1 else 5)
  952.  
  953.  
  954. if __name__ == '__main__':
  955.     showmain()
  956.  
Add Comment
Please, Sign In to add comment