Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """
- Single-pole balancing experiment using a discrete-time recurrent neural network.
- """
- from __future__ import print_function
- import os
- import pickle
- import numpy as np
- import cart_pole
- from neat import nn, parallel, population
- from neat.config import Config
- from math import radians as rad
- runs_per_net = 5
- num_steps = 60000 # equivalent to 1 minute of simulation time
- def dpb_factory():
- dpb = cart_pole.PoledCart(2)
- dpb.pole_number = 2
- p_masses = [0.1] * dpb.pole_number
- p_angles = [0.0, rad(1.0)]
- p_h_lens = [0.1, 1.0]
- p_accels = [0.0] * dpb.pole_number
- p_vels = [0.0] * dpb.pole_number
- dpb.poles = []
- for i in range(dpb.pole_number):
- dpb.poles.append(cart_pole.Pole(p_angles[i],
- p_vels[i],
- p_accels[i],
- p_masses[i],
- p_h_lens[i]))
- dpb.cart_pos = 0.0
- dpb.cart_vel = 0.0
- dpb.cart_acc = 0.0
- dpb.cart_mass = 1.0
- dpb.time = 0.0
- dpb.applied_force = 0.0
- dpb.track_limit = 2.4
- dpb.p_failure_angle = rad(36)
- dpb.time_step = 0.01
- dpb.cart_fric = 0.05
- dpb.p_fric = 0.000002
- dpb.stop_at_zero_deg = True
- return dpb
- def get_normilized_dpb_input_with_vel(dpb):
- return np.clip([dpb.cart_pos / dpb.track_limit,
- dpb.cart_vel / 4.0,
- dpb.poles[0].angle / dpb.p_failure_angle,
- dpb.poles[0].vel / 5.0,
- dpb.poles[1].angle / dpb.p_failure_angle,
- dpb.poles[1].vel / 4.0], -1, 1)
- def get_normilized_dpb_input_no_vel(dpb):
- return np.clip([dpb.cart_pos / dpb.track_limit,
- dpb.poles[0].angle / dpb.p_failure_angle,
- dpb.poles[1].angle / dpb.p_failure_angle], -1, 1)
- # Use the NN network phenotype and the discrete actuator force function.
- def evaluate_genome(g):
- net = nn.create_feed_forward_phenotype(g)
- fitnesses = []
- for runs in range(runs_per_net):
- dpb = dpb_factory()
- # Run the given simulation for up to num_steps time steps.
- fitness = 0.0
- for s in range(num_steps):
- # 3 no velocity, 6 with velocity
- inputs = get_normilized_dpb_input_with_vel(dpb)
- action = net.serial_activate(inputs)
- # Apply action to the simulated cart-pole
- force = cart_pole.discrete_actuator_force(action)
- dpb.applied_force = force
- dpb.update_state()
- dpb.update_state()
- if dpb.failed:
- break;
- fitness += 1.0
- fitnesses.append(fitness)
- # The genome's fitness is its worst performance across all runs.
- return min(fitnesses)
- # Load the config file, which is assumed to live in
- # the same directory as this script.
- local_dir = os.path.dirname(__file__)
- config = Config(os.path.join(local_dir, 'nn_config'))
- pop = population.Population(config)
- pe = parallel.ParallelEvaluator(4, evaluate_genome)
- pop.run(pe.evaluate, 100)
- # Save the winner.
- print('Number of evaluations: {0:d}'.format(pop.total_evaluations))
- winner = pop.statistics.best_genome()
- with open('nn_winner_genome', 'wb') as f:
- pickle.dump(winner, f)
- print(winner)
Advertisement
Add Comment
Please, Sign In to add comment