Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include <unordered_map>
- #include <fstream>
- #include <cmath>
- #include <sstream>
- #include <random>
- #include <algorithm>
- #include <chrono>
- std::mt19937 rng(6274674);
- std::uniform_int_distribution<std::mt19937::result_type> dist(0,25);
- using Map = std::unordered_map<std::string, double>;
- inline Map ReadNgrams(const std::string& filename) {
- std::fstream input_file(filename);
- std::string line;
- Map map;
- map["1"] = 1.0;
- map.clear();
- while(getline(input_file,line))
- {
- std::stringstream line_stream;
- line_stream << line;
- std::string quadra;
- line_stream >> quadra;
- std::string count;
- line_stream >> count;
- double log_prob = log(strtol(count.c_str(), nullptr, 10));
- map[quadra] = log_prob;
- }
- return map;
- }
- inline std::string ReadText(const std::string& filename) {
- std::string res;
- std::fstream input_file(filename);
- std::string line;
- while(getline(input_file,line)) {
- res += line;
- }
- return res;
- }
- inline char GetChar(char c, const std::vector<int>& permutation) {
- if (!isalpha(c)) {
- return c;
- }
- if (!isupper(c)) {
- int ind = tolower(c) - 'a';
- return char('a' + permutation[ind]);
- } else {
- int ind = c - 'A';
- return char('A' + permutation[ind]);
- }
- }
- inline double GetScore(const std::string& text, const Map& map, const std::vector<int>& permutation) {
- double res = 0.0;
- for (size_t i = 0; i < text.size() - 3; ++i) {
- std::string quad;
- quad += GetChar(text[i], permutation);
- quad += GetChar(text[i + 1], permutation);
- quad += GetChar(text[i + 2], permutation);
- quad += GetChar(text[i + 3], permutation);
- if (map.find(quad) != map.end()) {
- res += map.at(quad);
- }
- }
- return res;
- }
- inline std::string Recode(const std::string& text, const std::vector<int>& permutation) {
- std::string res;
- for (auto elem : text) {
- res += GetChar(elem, permutation);
- }
- return res;
- }
- inline std::string Decode(const std::string& text, const Map& dict) {
- std::string new_text;
- for (auto elem : text) {
- if (isalpha(elem)) {
- new_text += toupper(elem);
- }
- }
- std::vector<int> permutation;
- permutation.reserve(26);
- for (int i = 0; i < 26; ++i) {
- permutation.push_back(i);
- }
- int count = 0;
- double score = GetScore(new_text, dict, permutation);
- while (count <= 10'000) {
- int i = 0, j = 0;
- while (i == j) {
- i = dist(rng);
- j = dist(rng);
- }
- auto new_permutation = permutation;
- std::swap(new_permutation[i], new_permutation[j]);
- double new_score = GetScore(new_text, dict, new_permutation);
- if (new_score > score) {
- permutation = new_permutation;
- score = new_score;
- }
- ++count;
- }
- /*for (size_t i = 0; i < permutation.size(); ++i) {
- std::cout << char('a' + i) << " " << char('a' + permutation[i]) << std::endl;
- }*/
- return Recode(text, permutation);
- }
- int main() {
- auto t = clock();
- auto text = ReadText("C:\\Users\\Admin\\CLionProjects\\decoder\\first.txt");
- auto dict = ReadNgrams("C:\\Users\\Admin\\CLionProjects\\decoder\\english_quadgrams.txt");
- std::string res = Decode(text, dict);
- std::cout << clock() - t << std::endl;
- /*auto real_text = ReadText("C:\\Users\\Admin\\CLionProjects\\decoder\\third_real.txt");
- int count = 0;
- std::cout << std::boolalpha << (real_text.size() == res.size()) << std::endl;
- for (int i = 0; i < real_text.size(); ++i) {
- if (real_text[i] != res[i]) {
- std::cout << i << " " << real_text[i] << " " << res[i] << std::endl;
- ++count;
- }
- }
- std::cout << count << std::endl;*/
- /*std::vector<int> p;
- p.reserve(26);
- for (int i = 0; i < 26; ++i) {
- p.push_back(i);
- }
- std::random_shuffle(p.begin(), p.end());
- std::string text;
- for (auto elem : real_text) {
- text += GetChar(elem, p);
- }
- std::cout << text;
- std::ofstream fout("C:\\Users\\Admin\\CLionProjects\\decoder\\fourth.txt");
- fout << text;*/
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement