nolog1n

exam_selector_cpp

Jan 4th, 2019
48
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #include <iostream>
  2. #include <fstream>
  3. #include <vector>
  4. #include <sstream>
  5. #include <stdexcept>
  6. #include <random>
  7. #include <unordered_map>
  8. #include <unordered_set>
  9. using namespace std;
  10.  
  11. void usage() {
  12. cout << "Usage: program VOCAB_FILE N" << endl;
  13. }
  14.  
  15. string rusLetters = "абвгдежзиклмнопрстуфхцчшщэюя";
  16.  
  17. vector<string> readVocab(string vocabFilename) {
  18. ifstream infile(vocabFilename);
  19. vector<string> vocab;
  20. string buf;
  21. while(getline(infile, buf)) {
  22. if(!buf.empty())
  23. vocab.push_back(buf);
  24. }
  25. return vocab;
  26. }
  27.  
  28. int parseInt(const string& str) {
  29. istringstream istr(str);
  30. int x;
  31. if(istr >> x)
  32. return x;
  33. else
  34. throw runtime_error("Expected integer, got " + str);
  35. }
  36.  
  37. size_t characterLength(char first) {
  38. unsigned char c = static_cast<unsigned char>(first);
  39. if(c <= 127)
  40. return 1;
  41. else if(c <= 223)
  42. return 2;
  43. else if(c <= 239)
  44. return 3;
  45. else
  46. return 4;
  47. }
  48.  
  49. string firstLetter(const string& str) {
  50. size_t length = characterLength(str.at(0));
  51. string letter = str.substr(0, length);
  52. if(letter.size() != length)
  53. throw runtime_error("Invalid encoding");
  54. return letter;
  55. }
  56.  
  57. void print_weights(const vector<string>& alphabete, const vector<double>& weights) {
  58. for(size_t i = 0; i < weights.size(); ++i)
  59. cout << alphabete[i] << " " << weights[i] << endl;
  60. cout << endl;
  61. }
  62.  
  63. void normalize(vector<double>& weights) {
  64. double sum = 0;
  65. for(double w: weights)
  66. sum += w;
  67.  
  68. for(double& w: weights)
  69. w /= sum;
  70. }
  71.  
  72. vector<string> getLetters(int nLetters,
  73. double alpha,
  74. const vector<string>& alphabete,
  75. const vector<string>& vocab) {
  76. vector<double> weights(alphabete.size());
  77. std::fill(weights.begin(), weights.end(), alpha);
  78.  
  79. unordered_map<string, size_t> letter2id;
  80.  
  81. for(size_t i = 0; i < alphabete.size(); ++i)
  82. letter2id[alphabete[i]] = i;
  83.  
  84. for(const string& definition: vocab) {
  85. string letter = firstLetter(definition);
  86.  
  87. weights[letter2id.at(letter)] += 1;
  88. }
  89.  
  90. normalize(weights);
  91. print_weights(alphabete, weights);
  92.  
  93. std::random_device rd;
  94. std::mt19937 gen(rd());
  95. std::discrete_distribution<size_t> ds(weights.begin(), weights.end());
  96.  
  97. vector<string> result;
  98. for(int i = 0; i < nLetters; ++i) {
  99. size_t idx = ds(gen);
  100. result.push_back(alphabete.at(idx));
  101. }
  102.  
  103. return result;
  104. }
  105.  
  106. vector<string> inferAlphabete(const vector<string>& vocab) {
  107. std::unordered_set<string> unique_letters;
  108. for(const string& def : vocab)
  109. unique_letters.insert(firstLetter(def));
  110.  
  111. return std::vector<string>(unique_letters.begin(), unique_letters.end());
  112. }
  113.  
  114. int main(int argc, char* argv[])
  115. {
  116. if(argc < 3) {
  117. usage();
  118. return 0;
  119. }
  120.  
  121. string vocabFilename = argv[1];
  122. vector<string> vocab = readVocab(vocabFilename);
  123.  
  124. int nLetters = parseInt(argv[2]);
  125.  
  126. for(const string& letter : getLetters(nLetters, 0, inferAlphabete(vocab), vocab)) {
  127. cout << letter << " ";
  128. }
  129.  
  130. cout << endl;
  131.  
  132. return 0;
  133. }
Add Comment
Please, Sign In to add comment