Petrovi4

ExploreKeyWords

Aug 22nd, 2022 (edited)
938
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 2.06 KB | None | 0 0
  1. #include <functional>
  2. #include <future>
  3. #include <iostream>
  4. #include <iterator>
  5. #include <map>
  6. #include <set>
  7. #include <string>
  8. #include <string_view>
  9. #include <vector>
  10.  
  11. using namespace std;
  12.  
  13. struct Stats {
  14.     map<string, int> word_frequences;
  15.  
  16.     void operator+=(const Stats& other) {
  17.         for (const auto& [word, frequency] : other.word_frequences) {
  18.             word_frequences[word] += frequency;
  19.         }
  20.     }
  21. };
  22.  
  23. using KeyWords = set<string, less<>>;
  24.  
  25. vector<string_view> Split(string_view str) {
  26.     vector<string_view> result;
  27.     while (true) {
  28.         const auto space = str.find(' ');
  29.         if (space != 0 && !str.empty()) {
  30.             result.push_back(str.substr(0, space));
  31.         }
  32.         if (space == str.npos) {
  33.             break;
  34.         } else {
  35.             str.remove_prefix(space + 1);
  36.         }
  37.     }
  38.     return result;
  39. }
  40.  
  41. Stats ExploreLine(const KeyWords& key_words, string_view line) {
  42.     Stats result;
  43.     for (const string_view word : Split(line)) {
  44.         if (key_words.count(word) > 0) {
  45.             ++result.word_frequences[string(word)];
  46.         }
  47.     }
  48.     return result;
  49. }
  50.  
  51. Stats ExploreBatch(const KeyWords& key_words, vector<string> lines) {
  52.     Stats result;
  53.     for (const string& line : lines) {
  54.         result += ExploreLine(key_words, line);
  55.     }
  56.     return result;
  57. }
  58.  
  59. Stats ExploreKeyWords(const KeyWords& key_words, istream& input) {
  60.     const size_t MAX_BATCH_SIZE = 5000;
  61.  
  62.     vector<string> batch;
  63.     batch.reserve(MAX_BATCH_SIZE);
  64.  
  65.     vector<future<Stats>> futures;
  66.  
  67.     for (string line; getline(input, line);) {
  68.         batch.push_back(move(line));
  69.         if (batch.size() >= MAX_BATCH_SIZE) {
  70.             futures.push_back(
  71.                 async(ExploreBatch, ref(key_words), move(batch))
  72.             );
  73.             batch.reserve(MAX_BATCH_SIZE);
  74.         }
  75.     }
  76.  
  77.     Stats result;
  78.  
  79.     if (!batch.empty()) {
  80.         result += ExploreBatch(key_words, move(batch));
  81.     }
  82.  
  83.     for (auto& f : futures) {
  84.         result += f.get();
  85.     }
  86.  
  87.     return result;
  88. }
Add Comment
Please, Sign In to add comment