daily pastebin goal
53%
SHARE
TWEET

Untitled

a guest Jun 14th, 2018 66 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. std::vector<double> get_associations(const std::vector<Page>& pages,
  2.                      const std::vector<Word>& words,
  3.                      const std::vector<std::string>& keywords,
  4.                      const bool normalizable) {
  5.   std::map<int, int> pid_conv;
  6.   for(int i = 0; i < static_cast<int>(pages.size()); ++i) {
  7.     pid_conv[pages[i].id()] = i;
  8.   }
  9.  
  10.   std::map<std::string, int> word_conv;
  11.   for(int i = 0; i < static_cast<int>(words.size()); ++i) {
  12.     word_conv[words[i].str()] = i;
  13.   }
  14.  
  15.   std::vector<double> associations(pages.size(), 0.0);
  16.   for(const std::string& keyword : keywords) {
  17.     if(!word_conv.count(keyword)) continue;
  18.     int widx = word_conv[keyword];
  19.     std::set<int> pid_set;
  20.     for(const auto& location : words[widx].locations()) {
  21.       pid_set.insert(location.first);
  22.     }
  23.     for(int pid : pid_set) {
  24.       if(pid_conv.count(pid)) associations[pid_conv[pid]] += 1.0;
  25.     }
  26.   }
  27.  
  28.   if(normalizable) {
  29.     double max_association = *std::max_element(associations.begin(), associations.end());
  30.     if(max_association != 0.0) {
  31.       for(int i = 0; i < static_cast<int>(pages.size()); ++i) {
  32.     associations[i] /= max_association;
  33.       }
  34.     }
  35.   }
  36.  
  37.   return associations;
  38. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top