Advertisement
Guest User

Untitled

a guest
Dec 14th, 2019
143
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 4.31 KB | None | 0 0
  1. #include <iostream>
  2. #include <unordered_map>
  3. #include <fstream>
  4. #include <cmath>
  5. #include <sstream>
  6. #include <random>
  7. #include <algorithm>
  8. #include <chrono>
  9.  
  10. std::mt19937 rng(6274674);
  11. std::uniform_int_distribution<std::mt19937::result_type> dist(0,25);
  12.  
  13. using Map = std::unordered_map<std::string, double>;
  14.  
  15. inline Map ReadNgrams(const std::string& filename) {
  16.     std::fstream input_file(filename);
  17.     std::string line;
  18.     Map map;
  19.     map["1"] = 1.0;
  20.     map.clear();
  21.     while(getline(input_file,line))
  22.     {
  23.        std::stringstream line_stream;
  24.        line_stream << line;
  25.        std::string quadra;
  26.        line_stream >> quadra;
  27.        std::string count;
  28.        line_stream >> count;
  29.        double log_prob = log(strtol(count.c_str(), nullptr, 10));
  30.        map[quadra] = log_prob;
  31.     }
  32.     return map;
  33. }
  34.  
  35. inline std::string ReadText(const std::string& filename) {
  36.     std::string res;
  37.     std::fstream input_file(filename);
  38.     std::string line;
  39.     while(getline(input_file,line)) {
  40.         res += line;
  41.     }
  42.     return res;
  43. }
  44.  
  45. inline char GetChar(char c, const std::vector<int>& permutation) {
  46.     if (!isalpha(c)) {
  47.         return c;
  48.     }
  49.     if (!isupper(c)) {
  50.         int ind = tolower(c) - 'a';
  51.         return char('a' + permutation[ind]);
  52.     } else {
  53.         int ind = c - 'A';
  54.         return char('A' + permutation[ind]);
  55.     }
  56. }
  57.  
  58. inline double GetScore(const std::string& text, const Map& map, const std::vector<int>& permutation) {
  59.     double res = 0.0;
  60.     for (size_t i = 0; i < text.size() - 3; ++i) {
  61.         std::string quad;
  62.         quad += GetChar(text[i], permutation);
  63.         quad += GetChar(text[i + 1], permutation);
  64.         quad += GetChar(text[i + 2], permutation);
  65.         quad += GetChar(text[i + 3], permutation);
  66.         if (map.find(quad) != map.end()) {
  67.             res += map.at(quad);
  68.         }
  69.     }
  70.     return res;
  71. }
  72.  
  73. inline std::string Recode(const std::string& text, const std::vector<int>& permutation) {
  74.     std::string res;
  75.     for (auto elem : text) {
  76.         res += GetChar(elem, permutation);
  77.     }
  78.     return res;
  79. }
  80.  
  81. inline std::string Decode(const std::string& text, const Map& dict) {
  82.     std::string new_text;
  83.     for (auto elem : text) {
  84.         if (isalpha(elem)) {
  85.             new_text += toupper(elem);
  86.         }
  87.     }
  88.     std::vector<int> permutation;
  89.     permutation.reserve(26);
  90.     for (int i = 0; i < 26; ++i) {
  91.         permutation.push_back(i);
  92.     }
  93.     int count = 0;
  94.     double score = GetScore(new_text, dict, permutation);
  95.     while (count <= 10'000) {
  96.        int i = 0, j = 0;
  97.        while (i == j) {
  98.            i = dist(rng);
  99.            j = dist(rng);
  100.        }
  101.        auto new_permutation = permutation;
  102.        std::swap(new_permutation[i], new_permutation[j]);
  103.        double new_score = GetScore(new_text, dict, new_permutation);
  104.        if (new_score > score) {
  105.            permutation = new_permutation;
  106.            score = new_score;
  107.        }
  108.        ++count;
  109.    }
  110.    /*for (size_t i = 0; i < permutation.size(); ++i) {
  111.        std::cout << char('a' + i) << " " << char('a' + permutation[i]) << std::endl;
  112.    }*/
  113.    return Recode(text, permutation);
  114. }
  115.  
  116. int main() {
  117.    auto t = clock();
  118.    auto text = ReadText("C:\\Users\\Admin\\CLionProjects\\decoder\\first.txt");
  119.  
  120.    auto dict = ReadNgrams("C:\\Users\\Admin\\CLionProjects\\decoder\\english_quadgrams.txt");
  121.  
  122.    std::string res = Decode(text, dict);
  123.    std::cout << clock() - t << std::endl;
  124.  
  125.    /*auto real_text = ReadText("C:\\Users\\Admin\\CLionProjects\\decoder\\third_real.txt");
  126.  
  127.    int count = 0;
  128.    std::cout << std::boolalpha << (real_text.size() == res.size()) << std::endl;
  129.    for (int i = 0; i < real_text.size(); ++i) {
  130.        if (real_text[i] != res[i]) {
  131.            std::cout << i << " " << real_text[i] << " " << res[i] << std::endl;
  132.            ++count;
  133.        }
  134.    }
  135.    std::cout << count << std::endl;*/
  136.  
  137.  
  138.    /*std::vector<int> p;
  139.    p.reserve(26);
  140.    for (int i = 0; i < 26; ++i) {
  141.        p.push_back(i);
  142.    }
  143.    std::random_shuffle(p.begin(), p.end());
  144.    std::string text;
  145.    for (auto elem : real_text) {
  146.        text += GetChar(elem, p);
  147.    }
  148.    std::cout << text;
  149.  
  150.    std::ofstream fout("C:\\Users\\Admin\\CLionProjects\\decoder\\fourth.txt");
  151.    fout << text;*/
  152. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement