Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- std::vector<double> get_associations(const std::vector<Page>& pages,
- const std::vector<Word>& words,
- const std::vector<std::string>& keywords,
- const bool normalizable) {
- std::map<int, int> pid_conv;
- for(int i = 0; i < static_cast<int>(pages.size()); ++i) {
- pid_conv[pages[i].id()] = i;
- }
- std::map<std::string, int> word_conv;
- for(int i = 0; i < static_cast<int>(words.size()); ++i) {
- word_conv[words[i].str()] = i;
- }
- std::vector<double> associations(pages.size(), 0.0);
- for(const std::string& keyword : keywords) {
- if(!word_conv.count(keyword)) continue;
- int widx = word_conv[keyword];
- std::set<int> pid_set;
- for(const auto& location : words[widx].locations()) {
- pid_set.insert(location.first);
- }
- for(int pid : pid_set) {
- if(pid_conv.count(pid)) associations[pid_conv[pid]] += 1.0;
- }
- }
- if(normalizable) {
- double max_association = *std::max_element(associations.begin(), associations.end());
- if(max_association != 0.0) {
- for(int i = 0; i < static_cast<int>(pages.size()); ++i) {
- associations[i] /= max_association;
- }
- }
- }
- return associations;
- }
Add Comment
Please, Sign In to add comment