Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- //
- // weasel.c - A variant of Dawkins' "Weasel" program for modeling drift vs selection.
- //
- // Author: keiths at www.theskepticalzone.com
- //
- // DOS/Windows modifications by petrushka at The Skeptical Zone.
- //
- // Compile under Linux using 'gcc -std=gnu99 -lm weasel.c -o weasel'.
- // Add a '#define WINDOWS' before compiling under Windows or DOS.
- //
- #define VERSION "02232016"
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <time.h>
- #include <unistd.h>
- #include <ctype.h>
- #include <math.h>
- #ifdef WINDOWS
- #include <conio.h>
- #else
- #include <sys/select.h>
- #include <termios.h>
- #endif
- // adjustable parameters
- #define POPULATION_SIZE 50000 // total population size
- #define NUM_SURVIVORS 1 // number of survivors per generation (must be less than POPULATION_SIZE)
- #define GENOME_LEN 28 // number of loci in each genome (each locus is one character)
- #define MUTATION_RATE 0.01 // probability that a locus is mutated per generation
- #define SELECTION_COEFF 5.0 // selection coefficient
- #define DISPLAY_INTERVAL 10 // display results every n generations, where n == DISPLAY_INTERVAL
- #define PAUSE_TIME 50 // pause time in milliseconds
- #define GENOMES_TO_DISPLAY 1 // in STEP_MODE, number of genomes to display at each step
- #define STEP_MODE 1 // if set, program will pause every n generations and display the genomes
- #define ENABLE_LATCHING 0 // if set, enable latching
- #define ENABLE_SELECTION 1 // if set, enable selection; otherwise, select survivors with no bias
- #define CHARSET_SIZE 27 // 26 uppercase letters plus the space character
- #define NUM_LINES 300 // number of lines in the program's display "window"
- #define CHARS_PER_LINE 120 // for each line in the "window"
- long long generation; // generation count
- long long histogram[GENOME_LEN+1]; // histogram of Hamming distance from the target
- double mutation_rate;
- double selection_coefficient;
- double floating_num_survivors;
- int num_survivors;
- char buffer[20];
- // A genome consists of an array of loci, the associated fitness value, and a field indicating the number of matches
- // between the genome and the target phrase.
- typedef struct {
- char locus[GENOME_LEN];
- double fitness;
- int matches;
- } genome_t;
- genome_t target; // target phrase
- genome_t genome_array[POPULATION_SIZE]; // genomes of all members of the population
- char weasel_to_ascii(char c) {
- switch(c) {
- case 0: return ' ';
- case 1: return 'A';
- case 2: return 'B';
- case 3: return 'C';
- case 4: return 'D';
- case 5: return 'E';
- case 6: return 'F';
- case 7: return 'G';
- case 8: return 'H';
- case 9: return 'I';
- case 10: return 'J';
- case 11: return 'K';
- case 12: return 'L';
- case 13: return 'M';
- case 14: return 'N';
- case 15: return 'O';
- case 16: return 'P';
- case 17: return 'Q';
- case 18: return 'R';
- case 19: return 'S';
- case 20: return 'T';
- case 21: return 'U';
- case 22: return 'V';
- case 23: return 'W';
- case 24: return 'X';
- case 25: return 'Y';
- case 26: return 'Z';
- }
- }
- char ascii_to_weasel(char c) {
- switch(c) {
- case ' ': return 0;
- case 'A': return 1;
- case 'B': return 2;
- case 'C': return 3;
- case 'D': return 4;
- case 'E': return 5;
- case 'F': return 6;
- case 'G': return 7;
- case 'H': return 8;
- case 'I': return 9;
- case 'J': return 10;
- case 'K': return 11;
- case 'L': return 12;
- case 'M': return 13;
- case 'N': return 14;
- case 'O': return 15;
- case 'P': return 16;
- case 'Q': return 17;
- case 'R': return 18;
- case 'S': return 19;
- case 'T': return 20;
- case 'U': return 21;
- case 'V': return 22;
- case 'W': return 23;
- case 'X': return 24;
- case 'Y': return 25;
- case 'Z': return 26;
- }
- }
- // Swap two genomes given their indices.
- void swap_genomes(int g1, int g2) {
- genome_t temp_genome;
- if (g1 == g2) {
- return;
- } else {
- temp_genome = genome_array[g1];
- genome_array[g1] = genome_array[g2];
- genome_array[g2] = temp_genome;
- }
- }
- // Thoroughly and randomly scramble the population within the genome array.
- void scramble_genomes() {
- for (int i=0; i<6; i++) {
- for (int j=0; j<POPULATION_SIZE; j++) {
- swap_genomes(j, rand()%POPULATION_SIZE);
- }
- }
- }
- // Calculate a genome's fitness by determining the number of loci at which
- // it matches the target, then applying the formula (1+s)^m where s is the selection
- // coefficient and m is the number of matches.
- double fitness(genome_t *genome) {
- genome->matches = 0;
- // scan the entire genome, counting the number of loci that match the target
- for (int i=0; i < GENOME_LEN; i++) {
- if (genome->locus[i] == target.locus[i]) ++genome->matches;
- }
- return pow(1.0+selection_coefficient, genome->matches);
- }
- // Display a genome as a character string.
- void display_genome(genome_t *genome) {
- for (int i=0; i < GENOME_LEN; i++) {
- putchar(weasel_to_ascii(genome->locus[i]));
- }
- }
- // Convert an ascii string to a Weasel genome
- void weaselize (char *ascii_str, genome_t *genome) {
- for (int i=0; i<GENOME_LEN; i++) {
- genome->locus[i] = ascii_to_weasel(ascii_str[i]);
- }
- }
- // Move cursor to the home position
- void cursor_home() {
- #ifdef WINDOWS
- gotoxy(0,0);
- #else
- printf("\x1b[H");
- #endif
- }
- // Overwrite everything on the screen with spaces
- void clear_screen() {
- #ifdef WINDOWS
- clrscr();
- #else
- cursor_home();
- for (int i=0; i < NUM_LINES; i++) {
- for (int j=0; j < CHARS_PER_LINE; j++) {
- putchar(' ');
- }
- printf("\n\r");
- }
- #endif
- }
- // Mutate 'old' genome, placing results in 'new'
- void mutate(genome_t *old, genome_t *new) {
- for (int i=0; i < GENOME_LEN; i++) {
- if ((double)rand()/(double)RAND_MAX < mutation_rate * 27.0/26.0 && (!ENABLE_LATCHING || old->locus[i] != target.locus[i])) {
- new->locus[i] = (char)(rand()%CHARSET_SIZE); // mutate this locus
- } else {
- new->locus[i] = old->locus[i]; // don't mutate
- }
- }
- new->fitness = fitness(new);
- }
- // Initialize a genome with a random string
- void init_genome_rand(genome_t *genome) {
- for (int i=0; i < GENOME_LEN; i++) {
- genome->locus[i] = (char)(rand()%CHARSET_SIZE);
- }
- genome->fitness = fitness(genome);
- }
- // Comparison function for use by qsort() library routine
- int compare_fitness (const void * elem1, const void * elem2) {
- if (((genome_t *)elem2)->fitness > ((genome_t *)elem1)->fitness) return 1;
- if (((genome_t *)elem2)->fitness == ((genome_t *)elem1)->fitness) return 0;
- return -1;
- }
- // Use weighted random selection to choose offspring for the survivor slots
- void pick_survivors() {
- double total_fitness = 0.0;
- double cumulative_fitness;
- double rand_num;
- int j;
- // add up all fitnesses
- for (int i=0; i<POPULATION_SIZE; i++) {
- total_fitness += genome_array[i].fitness;
- }
- // for each survivor slot
- for (int i=0; i<num_survivors; i++) {
- cumulative_fitness = 0.0;
- // generate a random number in the range 0-total_fitness
- rand_num = (double)rand()/(double)RAND_MAX * total_fitness;
- // from the remaining offspring, select the one whose range is hit by the random number
- for (j=i; j<POPULATION_SIZE; j++) {
- cumulative_fitness += genome_array[j].fitness;
- if (cumulative_fitness >= rand_num) {
- break;
- }
- }
- // swap it with the one in the survivor slot
- swap_genomes(i,j);
- // adjust the total fitness to reflect only the remaining offspring
- total_fitness -= genome_array[i].fitness;
- }
- }
- void display_histogram() {
- long long max = 0;
- long long width;
- printf("Histogram of Hamming distances:\n\n\r");
- for (int i=0; i<=GENOME_LEN; i++) {
- if (histogram[i] > max) {
- max = histogram[i];
- }
- }
- if (max > 0) {
- for (int i=0; i<=GENOME_LEN; i++) {
- printf("%2d: %8lld ", i, histogram[i]);
- width = 60 * histogram[i]/max;
- for (int j=0; j<width; j++) {
- printf("X");
- }
- printf("\n\r");
- }
- }
- printf("\n\r");
- }
- #ifndef WINDOWS
- //
- // From http://stackoverflow.com/questions/448944/c-non-blocking-keyboard-input
- //
- struct termios orig_termios;
- void reset_terminal_mode()
- {
- tcsetattr(0, TCSANOW, &orig_termios);
- }
- void set_conio_terminal_mode()
- {
- struct termios new_termios;
- /* take two copies - one for now, one for later */
- tcgetattr(0, &orig_termios);
- memcpy(&new_termios, &orig_termios, sizeof(new_termios));
- /* register cleanup handler, and set the new terminal mode */
- atexit(reset_terminal_mode);
- cfmakeraw(&new_termios);
- tcsetattr(0, TCSANOW, &new_termios);
- }
- int kbhit()
- {
- struct timeval tv = { 0L, 0L };
- fd_set fds;
- FD_ZERO(&fds);
- FD_SET(0, &fds);
- return select(1, &fds, NULL, NULL, &tv);
- }
- int getch()
- {
- int r;
- unsigned char c;
- if ((r = read(0, &c, sizeof(c))) < 0) {
- return r;
- } else {
- return c;
- }
- }
- #endif
- // Read a double-precision floating point number from keyboard input.
- // Can't use scanf here because of the non-blocking I/O.
- void get_double(double *ptr) {
- int i = 0;
- char c;
- do {
- while (!kbhit()) { // wait for keystroke
- ;
- }
- c = getch();
- putchar(c);
- fflush(stdout);
- buffer[i++] = c;
- } while (buffer[i-1] != '\r' && i < sizeof(buffer)); // accumulate characters in buffer until CR
- buffer[i-1] = '\n';
- i=0;
- fflush(stdout);
- sscanf(buffer, "%lf", ptr);
- }
- int main() {
- int i;
- int selection_enabled = ENABLE_SELECTION;
- char ascii_target[GENOME_LEN+1];
- char c;
- #ifndef WINDOWS
- set_conio_terminal_mode();
- #endif
- printf("\n\n\n\n\r");
- // set cursor to home position, clear screen, and rehome
- clear_screen();
- cursor_home();
- // seed the random number generator with the current time
- srand(time(NULL));
- // set up ascii version of initial fitness target
- ascii_target[0] = 'M';
- ascii_target[1] = 'E';
- ascii_target[2] = 'T';
- ascii_target[3] = 'H';
- ascii_target[4] = 'I';
- ascii_target[5] = 'N';
- ascii_target[6] = 'K';
- ascii_target[7] = 'S';
- ascii_target[8] = ' ';
- ascii_target[9] = 'I';
- ascii_target[10] = 'T';
- ascii_target[11] = ' ';
- ascii_target[12] = 'I';
- ascii_target[13] = 'S';
- ascii_target[14] = ' ';
- ascii_target[15] = 'L';
- ascii_target[16] = 'I';
- ascii_target[17] = 'K';
- ascii_target[18] = 'E';
- ascii_target[19] = ' ';
- ascii_target[20] = 'A';
- ascii_target[21] = ' ';
- ascii_target[22] = 'W';
- ascii_target[23] = 'E';
- ascii_target[24] = 'A';
- ascii_target[25] = 'S';
- ascii_target[26] = 'E';
- ascii_target[27] = 'L';
- // set up weasel version of the fitness target
- weaselize(ascii_target, &target);
- // randomly initialize the genomes of the entire population
- for (i=0; i < POPULATION_SIZE; i++) {
- init_genome_rand(&genome_array[i]);
- }
- printf("\n\n\rInitial Organism 0 Genome:\n\n\r");
- display_genome(&genome_array[0]);
- printf("\n\n\n\r");
- generation = 0;
- mutation_rate = MUTATION_RATE;
- selection_coefficient = SELECTION_COEFF;
- num_survivors = NUM_SURVIVORS;
- // initialize the histogram to zeroes
- for (int i=0; i<=GENOME_LEN; i++) {
- histogram[i] = 0;
- }
- while (1) {
- // preserve the survivors, but convert the rest into mutated copies of the survivors
- for (i=0; i < POPULATION_SIZE - num_survivors; i++) {
- mutate(&genome_array[i%num_survivors], &genome_array[i+num_survivors]);
- }
- // now mutate the survivors themselves
- for (i=0; i<num_survivors; i++) {
- mutate(&genome_array[i], &genome_array[i]);
- }
- if (selection_enabled) {
- // use weighted random selection to choose offspring for the survivor slots
- pick_survivors();
- } else {
- // randomly scramble the genome array
- scramble_genomes();
- }
- ++histogram[GENOME_LEN-genome_array[0].matches];
- ++generation;
- // in step mode, pause and display all genomes periodically
- if (STEP_MODE && generation % DISPLAY_INTERVAL == 0) {
- clear_screen();
- cursor_home();
- printf("Weasel version %s\n\n\r", VERSION);
- printf("Press 'h' at any time for help\n\n\n\n\r");
- printf("Target Phrase: ");
- printf("%s", ascii_target);
- printf("\n\n\rGeneration: %lld Number of survivors per generation: %d\n\r", generation, num_survivors);
- printf("\n\rSelection: %s Coefficient: %1.2lf Mutation rate: %1.2lf\n\n\r", selection_enabled ? "ON" : "OFF", selection_coefficient, mutation_rate);
- for (i = 0; i < GENOMES_TO_DISPLAY; i++) {
- printf("Organism %d Fitness %1.2le Hamming distance %d\n\n\r", i, genome_array[i].fitness, GENOME_LEN-genome_array[0].matches);
- display_genome(&genome_array[i]);
- printf("\r\n\n");
- }
- display_histogram();
- #ifdef WINDOWS
- sleep(PAUSE_TIME); // pause briefly before continuing
- #else
- usleep(1000*PAUSE_TIME); // pause briefly before continuing
- #endif
- }
- if (!STEP_MODE && generation % DISPLAY_INTERVAL == 0) {
- printf("generation = %lld fitness = %1.2le Hamming distance = %d\n\r", generation, genome_array[0].fitness, GENOME_LEN-genome_array[0].matches);
- }
- if (kbhit()) { // keystroke detected
- switch(getch()) {
- case 'c': // clear the histogram
- for (int i=0; i<=GENOME_LEN; i++) {
- histogram[i] = 0;
- }
- break;
- case 'f': // change the selection coefficient
- printf("Enter new selection coefficient: ");
- fflush(stdout);
- get_double(&selection_coefficient);
- break;
- case 'h': // print help message
- printf("Commands:\r\n");
- printf(" c - clear the histogram data\r\n");
- printf(" f - change the selection coefficient\r\n");
- printf(" h - print this help message\r\n");
- printf(" m - change the mutation rate\r\n");
- printf(" p - pause until a key is pressed\r\n");
- printf(" q - quit the program\r\n");
- printf(" s - toggle selection on/off\r\n");
- printf(" t - change the target phrase\r\n");
- printf(" v - change the number of survivors per generation\r\n");
- printf("\r\nHit any key to continue\r\n");
- while(!kbhit()){
- ;
- }
- (void)getch(); // consume character
- break;
- case 'm': // change the mutation rate
- printf("Enter new mutation rate: ");
- fflush(stdout);
- get_double(&mutation_rate);
- break;
- case 'p': // pause until next keystroke
- printf("Paused: Hit any key to continue\r\n");
- while(!kbhit()){
- ;
- }
- (void)getch(); // consume character
- break;
- case 'q': // quit the program
- exit(0);
- case 's': // toggle selection on and off
- selection_enabled = !selection_enabled;
- break;
- case 't': // set new target phrase
- printf("Enter new target phrase (spaces and letters only): ");
- fflush(stdout);
- i = 0;
- do {
- while (!kbhit()) { // wait for keystroke
- ;
- }
- c = toupper(getch());
- if (c == '\r') break;
- if (isalpha(c) || c == ' ') {
- putchar(c);
- fflush(stdout);
- ascii_target[i++] = c; // add character to target phrase
- }
- } while (ascii_target[i-1] != '\r' && i-1 < GENOME_LEN);
- for(; i<GENOME_LEN; i++) {
- ascii_target[i] = ' '; // pad remainder of target with spaces
- }
- weaselize(ascii_target, &target);
- // need to recompute fitness values because the target has changed
- for (i=0; i<POPULATION_SIZE; i++) {
- genome_array[i].fitness = fitness(&genome_array[i]);
- }
- break;
- case 'v': // change the number of survivors per generation
- printf("Enter desired number of survivors: ");
- fflush(stdout);
- get_double(&floating_num_survivors);
- num_survivors = (int)floating_num_survivors;
- break;
- }
- }
- }
- printf("\n\n\rFinal generation = %lld fitness = %1.2le Hamming distance = %d\n\r", generation, genome_array[0].fitness, GENOME_LEN-genome_array[0].fitness);
- printf("\n\rFinal genome:\n\n\r");
- display_genome(&genome_array[0]);
- printf("\n\n\r");
- #ifndef WINDOWS
- tcsetattr(0, TCSANOW, &orig_termios);
- #endif
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement