Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /************************************************************************************************************************************************
- *
- * This program attemps to write programs of its own in the brainfuck language, using a genetic algorithm.
- * Since brainfuck isn't fully defined, some assumptions had to be made:
- *
- * -The size of the 'tape' holding cells will be fixed-length (1000 cells, chosesn arbitrarily). This is primarily for a speed boost.
- * -Each cell can hold an unsigned char (so up to a value of 255). Going over will cause wrap-around.
- *
- * A set of inputs and outputs will be fed to each potential solution.
- * If the genome can get matching output for the specified inputs, it will have written the correct program.
- * For example, if you want a brainfuck program that halves integers, you can feed the GA the following sets of inputs and outputs:
- *
- * I: 10, O: 5
- * I: 22, O: 11
- * I: 34, O: 17
- *
- * The fitness function will take into account how close the output matches, and how concise the program is.
- * Shorter programs receive a slight bonus.
- *
- *************************************************************************************************************************************************/
- #include <iostream>
- #include <string>
- #include <map>
- #include <cstdlib>
- #include <ctime>
- #include <cmath>
- #include "interpreter.h"
- /* Don't modify this group of constants. */
- const int CHAR_SIZE = 255; // The max value of a 'cell' in the memory tape
- const int NUM_MUTATIONS = 3; // The number of types of mutations: (ADD, DELETE, CHANGE)
- const char INSTRUCTIONS[] = {'+', '-', '>', '<', '[', ']', '.'}; // The list of brainfuck instructions.
- const int NUM_INSTRUCTIONS = (sizeof(INSTRUCTIONS) / sizeof(INSTRUCTIONS[0])); // Holds the number of instructions allowed.
- const int NUM_CHILDREN = 2; // Number of children two parents create upon reproduction.
- /* Modify any constants below. */
- const int MAX_GENERATIONS = 1000000000; // The number of generations to repeat the genetic algorithm process.
- const int POP_SIZE = 5; // The size of the population. This always remains the same between generations.
- const int MIN_PROGRAM_SIZE = 10; // The minimum size a possible program can be.
- const int MAX_PROGRAM_SIZE = 500; // The maximum size a possible program can be.
- const double MUTATION_RATE = 0.01; // The chance of a 'gene' in a child being mutated
- const double LENGTH_PENALTY = 0.001; // The size of the program is multiplied by this then added to score.
- const int DISPLAY_RATE = 10000; // How often to display the best program so far.
- const std::string GOAL_OUTPUT = "Artificial";
- const int GOAL_OUTPUT_SIZE = GOAL_OUTPUT.length();
- /*const std::string INPUT_SETS[] = {"10", "4", "16"};
- const int NUM_INPUT_SETS = 3;*/
- /* Generates a random double between low and high. */
- double get_random(double low, double high)
- {
- return low + static_cast<double>(rand()) / static_cast<double>(static_cast<double>(RAND_MAX) / (high - low));
- }
- int get_random_int(int low, int high)
- {
- return (rand() % (high - low + 1)) + low;
- }
- /* Returns a random brainfuck instruction. */
- char get_random_instruction()
- {
- return INSTRUCTIONS[get_random_int(0, NUM_INSTRUCTIONS - 1)];
- }
- void add_instruction(std::string &program, int index)
- {
- std::string instr(1, get_random_instruction()); // Need to convert char to string for use by insert
- if((program.length() + 1) <= MAX_PROGRAM_SIZE)
- program.insert(index, instr);
- }
- void remove_instruction(std::string &program, int index)
- {
- // Subtract 2 instead of 1 to account for the off-by-one difference between a string length and the last element index
- if((program.length() - 2) >= MIN_PROGRAM_SIZE)
- program.erase(index, 1);
- }
- void mutate_instruction(std::string &program, int index)
- {
- program[index] = get_random_instruction();
- }
- /* Creates a random program by first randomly determining its size and then adding that many instructions randomly. */
- std::string create_random_program()
- {
- std::string program;
- int program_size = get_random_int(MIN_PROGRAM_SIZE, MAX_PROGRAM_SIZE);
- for(int i = 1; i <= program_size; ++i)
- program += get_random_instruction();
- return program;
- }
- /* Creates the first generation's population by randomly creating programs. */
- void initialize_population(std::string programs[])
- {
- for(int i = 0; i < POP_SIZE; ++i)
- programs[i] = create_random_program();
- }
- /* The Fitness Function. Determines how 'fit' a program is using a few different criteria. */
- double calculate_fitness(const std::string &program, Interpreter &bf, std::map<std::string, double> &programs_scored)
- {
- // If program is in the cache, return that value instead of recalculating.
- if(programs_scored.find(program) != programs_scored.end()) return programs_scored[program];
- double score = 0;
- double max_score = GOAL_OUTPUT_SIZE * CHAR_SIZE; // The score of the worse program possible.
- std::string output = bf.run(program, "");
- std::string min_str = (output.length() < GOAL_OUTPUT.length()) ? output : GOAL_OUTPUT;
- std::string max_str = (output.length() >= GOAL_OUTPUT.length()) ? output : GOAL_OUTPUT;
- // The more each character of output is similar to its corresponding character in the goal output, the higher the score.
- for(int i = 0; i < max_str.length(); ++i)
- {
- int output_char = (i < min_str.length()) ? min_str[i] : max_str[i] + CHAR_SIZE;
- score += abs(output_char - max_str[i]);
- }
- score += (program.length() * LENGTH_PENALTY); // Impose a slight penalty for longer programs.
- // Impose a large penalty for error programs, but still allow them a chance at reproduction for genetic variation.
- if(output == "Error") return 1;//return (max_score * 0.01);
- // Subtract from max score because the lower the score, the better.
- double final_score = max_score - score;
- programs_scored[program] = final_score; // Cache the result to save for later.
- return final_score;
- }
- /* Generates a fitness score for each program in the population.
- This is done by running the program through the brainfuck interpreter and scoring its output.
- Also returns the best program produced. */
- std::string score_population(std::string programs[], double scores[], int &worst_index, Interpreter &bf, std::map<std::string, double> &programs_scored)
- {
- std::string best_program;
- double best_score = 0;
- double worst_score = 999;
- for(int i = 0; i < POP_SIZE; ++i)
- {
- scores[i] = calculate_fitness(programs[i], bf, programs_scored);
- if(scores[i] > best_score)
- {
- best_program = programs[i];
- best_score = scores[i];
- }
- else if(scores[i] < worst_score)
- {
- worst_index = i;
- worst_score = scores[i];
- }
- }
- return best_program;
- }
- /* Adds every program's fitness score together. */
- double pop_score_total(const double scores[])
- {
- double total = 0;
- for(int i = 0; i < POP_SIZE; ++i)
- total += scores[i];
- return total;
- }
- /* Selects a parent to mate using fitness proportionate selection. Basically, the more fit a program it is, the more likely to be selected. */
- std::string select_parent(std::string programs[], double scores[], std::string other_parent = "")
- {
- double program_chances[POP_SIZE]; // Holds each program's chance of being selected (a # between 0 and 1)
- double score_total = pop_score_total(scores); // The sum of all programs' fitness scores
- double rand_val = get_random(0.0, 1.0);
- for(int i = 0; i < POP_SIZE; ++i)
- {
- double prev_chance = ((i - 1) < 0) ? 0 : program_chances[i - 1];
- // We add the previous program's chance to this program's chance because that is its range of being selected.
- // The higher the fitness score, the bigger this program's range is.
- program_chances[i] = (scores[i] / score_total) + (prev_chance);
- // Need to subtract a small amount from rand_val due to floating-point precision errors. Without it, equality check could fail.
- if(program_chances[i] >= (rand_val - 0.001) && (programs[i] != other_parent))
- return programs[i];
- }
- // If the other parent was the last program in the list, we might get here.
- // In that case, just return the 1st program.
- return programs[0];
- /* // Debugging output. Function should never get to this part.
- std::cout << "Score total: " << score_total << std::endl;
- std::cout << "Last score: " << scores[POP_SIZE - 1] << std::endl;
- std::cout << "Rand val: " << rand_val << std::endl;
- std::cout << "Last chance: " << program_chances[POP_SIZE - 1] << std::endl;
- std::cin.get();*/
- }
- std::string mutate(std::string child)
- {
- int program_length = child.length();
- for(int i = 0; i < child.length(); ++i)
- {
- bool do_mutate = MUTATION_RATE >= get_random(0.0, 1.0) ? true : false;
- if(do_mutate)
- {
- int mutation_type = get_random_int(1, NUM_MUTATIONS);
- switch(mutation_type)
- {
- case 1:
- mutate_instruction(child, i);
- break;
- case 2:
- add_instruction(child, i);
- break;
- case 3:
- remove_instruction(child, i);
- break;
- default:
- break;
- }
- }
- }
- return child;
- }
- /* Performs crossover between two parents to produce two children. */
- void mate(std::string parent1, std::string parent2, std::string children[])
- {
- // We need to find which program is biggest
- std::string min_str = (parent1.length() < parent2.length()) ? parent1 : parent2;
- std::string max_str = (parent1.length() >= parent2.length()) ? parent1 : parent2;
- // Determine a crossover point at random
- int crosspoint = get_random_int(1, max_str.length() - 1);
- // Find the substring of the program after the crossover point
- std::string max_str_contrib = max_str.substr(crosspoint);
- // Then erase after that point
- max_str.erase(crosspoint);
- // If the crosspoint is less than the length of the smaller program, then we need to combine part of it with the larger program.
- // If not, then we do nothing and just take part of the larger program and add it to it.
- if(crosspoint <= min_str.length())
- {
- max_str += min_str.substr(crosspoint);
- min_str.erase(crosspoint);
- }
- // Add the 2nd part of the larger program to the smaller program.
- min_str += max_str_contrib;
- // Call the mutate function on the children which has a small chance of actually causing a mutation.
- children[0] = mutate(min_str);
- children[1] = mutate(max_str);
- }
- bool program_exists(std::string program, std::string programs[])
- {
- for(int i = 0; i < POP_SIZE; ++i)
- {
- if(program == programs[i])
- return true;
- }
- return false;
- }
- void replace_program(std::string parent, std::string child, std::string programs[])
- {
- for(int i = 0; i < POP_SIZE; ++i)
- {
- if(parent == programs[i])
- {
- programs[i] = child;
- break;
- }
- }
- }
- int main()
- {
- // Initialize the brainfuck interpreter.
- Interpreter brainfuck;
- srand(time(0));
- std::string programs[POP_SIZE]; // All programs of the current generation.
- double fitness_scores[POP_SIZE]; // The fitness scores of all programs.
- // Below variables used for memoization; cache results that have already been computed
- // Not sure exactly if it is any fater lolz, due to having to search that the program is in the cache every iteration
- std::map<std::string, double> programs_scored;
- // Generates POP_SIZE number of random programs
- initialize_population(programs);
- std::string best_program;
- for(int g = 0; g < MAX_GENERATIONS; ++g)
- {
- int worst_program_index = 0;
- // Generate the scores of each program.
- best_program = score_population(programs, fitness_scores, worst_program_index, brainfuck, programs_scored);
- // Select two parents randomly using fitness proportionate selection
- std::string parent1 = select_parent(programs, fitness_scores);
- std::string parent2 = select_parent(programs, fitness_scores, parent1);
- // Mate them to create children
- std::string children[NUM_CHILDREN];
- mate(parent1, parent2, children);
- // Replace the parent programs with the children. We replace the parents to prevent premature convergence.
- // This works because by replacing the parents, which are most similar to the children, genetic diversity is maintained.
- // If the parents were not replaced, the population would quickly fill with similar genetic information.
- replace_program(parent1, children[0], programs);
- replace_program(parent2, children[1], programs);
- // Remove the parent programs from the cache to save space
- programs_scored.erase(parent1);
- programs_scored.erase(parent2);
- // Replace the worst program with the best program if it was replaced by its child. (Elitism).
- // This is done to ensure the best program is never lost.
- if(!program_exists(best_program, programs))
- programs[worst_program_index] = best_program;
- // Report on the current best program every so often.
- if(!(g % DISPLAY_RATE))
- {
- std::cout << "\n\nBest program evolved so far: " << std::endl;
- std::cout << best_program << std::endl;
- std::cout << "\nOutput: " << brainfuck.run(best_program, "") << std::endl;
- }
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement