Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include <iomanip>
- #include <fstream>
- #include <cstdint>
- #include <vector>
- #include <string>
- #include <unordered_map>
- #include <algorithm>
- #include <random>
- enum {
- survivors_per_generation = 125, // We kill all but this many candidates per generation
- leader_board_count = 25, // Show this many scores in the scoreboard each generation
- population_per_generation = 500, // Create this many individuals in a generation
- num_bits = 14, // Bits selected from md5sum's
- similarity_threshold = 90 // Controls how far mutations go
- };
- // Globals affecting sum scoring.
- std::vector<unsigned> bits; // Bits to select for Sum's value
- unsigned int state=0; // Seed to indicate need to recache value
- // Each Sum is an md5sum.
- struct Sum {
- // Initialize from data file
- Sum(const std::string& s) : laststate(), lastcalc()
- {
- data[0]=data[1]=0;
- if (s.size()<128) throw 1;
- for (unsigned i=0; i<128; ++i) {
- if (s[i]=='1') {
- data[i/64]|=(1ULL)<<(i%64);
- }
- }
- }
- uint64_t data[2];
- mutable unsigned int laststate;
- mutable unsigned int lastcalc; // current value iff state==laststate
- // Computes the "hash of the day"; an unsigned long formed by culling
- // all but the bits mentioned in "bits" within our md5sum.
- unsigned long value() const {
- if (state==laststate) { return lastcalc; }
- laststate=state;
- unsigned int m=1;
- unsigned int rv=0;
- for (unsigned int i=0, e=bits.size(); i<e; ++i) {
- unsigned bi=bits[i];
- if ((1ULL<<(bi%64))&data[bi/64]) rv|=m;
- m<<=1;
- }
- return lastcalc=rv;
- }
- };
- // The set of md5sum's for all of the color values, in the original order
- std::vector<Sum> words;
- // Calculates "duplication" score, which is the selection force.
- unsigned dupcalc(std::vector<unsigned> b) {
- bits=b; ++state;
- unsigned maxbucket=0;
- unsigned rawcount=0;
- std::unordered_map<unsigned,unsigned> valueset;
- for (unsigned i=0, e=words.size(); i<e; ++i) {
- unsigned thisbucket=++valueset[words[i].value()];
- if (thisbucket>1) rawcount++;
- if (thisbucket>maxbucket) maxbucket=thisbucket;
- }
- return maxbucket*1000+rawcount;
- }
- // A sequence of num_bits bits to pick out of the md5sum. These are the
- // individuals in the population.
- struct Candidate {
- Candidate() :score() { }
- Candidate(const Candidate& rhs) : bitset(rhs.bitset) , score() { }
- unsigned int value() const { if (!score) score=dupcalc(bitset); return score; }
- void normalize() { std::sort(bitset.begin(), bitset.end()); }
- std::vector<unsigned int> bitset;
- mutable unsigned int score;
- bool operator<(const Candidate& rhs) const {
- return value()<rhs.value();
- }
- };
- // The population
- std::vector<Candidate> population;
- // RNG. We don't seed this... this program is about finding values, not lucking
- // into them. Reproduceability is a good thing here.
- std::mt19937 generator;
- unsigned int generation=0; // For the scoreboard
- // Breeds a generation
- void breed() {
- ++generation;
- population.resize(survivors_per_generation);
- for (unsigned int j=0, je=population_per_generation; j<je; ++j) {
- unsigned int i=generator()%survivors_per_generation;
- // Bias towards best scores
- if (i) i=generator()%i;
- if (i) i=generator()%i;
- // Using this parent, spawn a child
- Candidate& parent = population[i];
- Candidate child(parent); // start with a clone
- // Mutate it
- for(;;) {
- child.bitset[generator()%child.bitset.size()]=(generator()%128);
- while ((generator()%100)>similarity_threshold) {
- child.bitset[generator()%child.bitset.size()]=(generator()%128);
- }
- if (child.value()==parent.value()) {
- if (child.bitset==parent.bitset) {
- continue;
- }
- }
- break;
- }
- child.normalize();
- population.push_back(child);
- }
- // Sort (by score)
- std::sort(population.begin(), population.end());
- }
- // Initially populates generation 0.
- void populate() {
- for (unsigned int i=population.size(); i<survivors_per_generation; ++i) {
- Candidate initial;
- for (unsigned int j=0; j<num_bits; ++j) { initial.bitset.emplace_back(generator()%128); }
- initial.normalize();
- population.push_back(initial);
- }
- std::cout << population.size() << " <- populated" << std::endl;
- std::sort(population.begin(), population.end());
- }
- // Show what's happening
- void scoreboard() {
- std::cout << "scoreboard: generation " << generation << std::endl;
- for (unsigned int i=0; i<leader_board_count; ++i) {
- unsigned int v = population[i].value();
- std::cout << "Candidate " << std::setw(2) << i << " {" << std::setw(3) << v << "}:" <<std::flush;
- for (unsigned int j=0; j<num_bits; ++j) {
- std::cout << " " << std::setw(3) << population[i].bitset[j] << std::flush;
- }
- std::cout << "\n";
- }
- }
- int main() {
- // Load words.
- // Each line of data is, in order, the md5sum of a color as a binary string.
- std::ifstream in("data");
- std::string line;
- while (std::getline(in, line)) { words.emplace_back(line); }
- std::cout << words.size() << std::endl;
- population.resize(1);
- population.back().bitset = std::vector<unsigned>({0, 8, 16, 31, 36, 39, 55, 75, 78, 80, 84, 85,108,125});
- // Populate generation 0
- populate();
- scoreboard();
- // Main loop
- for(;;) { breed(); scoreboard(); }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement