Advertisement
force1987

search_server.h

Mar 3rd, 2023
718
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 5.63 KB | None | 0 0
  1. #pragma once
  2. #include <iostream>
  3. #include <vector>
  4. #include <set>
  5. #include <map>
  6. #include <algorithm>
  7. #include "string_processing.h"
  8. #include "document.h"
  9.  
  10. using namespace std::string_literals;
  11. const int MAX_RESULT_DOCUMENT_COUNT = 5;
  12. const double ACCURACY = 1e-6;
  13.  
  14. class SearchServer {
  15. public:
  16.     template <typename StringContainer>
  17.     explicit SearchServer(const StringContainer& stop_words);
  18.     explicit SearchServer(const std::string& stop_words_text);
  19.  
  20.     void AddDocument(int document_id, const std::string& document, DocumentStatus status,
  21.         const std::vector<int>& ratings);
  22.  
  23.     template <typename DocumentPredicate>
  24.     std::vector<Document> FindTopDocuments(const std::string& raw_query,
  25.         DocumentPredicate document_predicate) const;
  26.  
  27.     std::vector<Document> FindTopDocuments(const std::string& raw_query, DocumentStatus status) const;
  28.  
  29.     std::vector<Document> FindTopDocuments(const std::string& raw_query) const;
  30.  
  31.     int GetDocumentCount() const;
  32.  
  33.     std::vector<int>::const_iterator begin() const;
  34.     std::vector<int>::const_iterator end() const;
  35.  
  36.  
  37.     std::tuple<std::vector<std::string>, DocumentStatus> MatchDocument(const std::string& raw_query,
  38.         int document_id) const;
  39.  
  40.     const std::map<std::string, double>& GetWordFrequencies(int document_id) const;
  41.     void RemoveDocument(int document_id);
  42.     std::vector<int> duplicates;
  43.  
  44. private:
  45.     struct DocumentData {
  46.         int rating;
  47.         DocumentStatus status;
  48.     };
  49.     const std::set<std::string> stop_words_;
  50.     std::map<int, std::map<std::string, double>> word_freqs_in_document;
  51.     std::map<int, DocumentData> documents_;
  52.     std::vector<int> document_ids_;
  53.    
  54.  
  55.     bool IsStopWord(const std::string& word) const;
  56.  
  57.     static bool IsValidWord(const std::string& word);
  58.  
  59.     std::vector<std::string> SplitIntoWordsNoStop(const std::string& text) const;
  60.  
  61.     static int ComputeAverageRating(const std::vector<int>& ratings);
  62.  
  63.     struct QueryWord {
  64.         std::string data;
  65.         bool is_minus;
  66.         bool is_stop;
  67.     };
  68.  
  69.     QueryWord ParseQueryWord(const std::string& text) const;
  70.  
  71.     struct Query {
  72.         std::set<std::string> plus_words;
  73.         std::set<std::string> minus_words;
  74.     };
  75.  
  76.     Query ParseQuery(const std::string& text) const;
  77.  
  78.     double ComputeWordInverseDocumentFreq(const std::string& word) const;
  79.  
  80.     template <typename DocumentPredicate>
  81.     std::vector<Document> FindAllDocuments(const Query& query,
  82.         DocumentPredicate document_predicate) const;
  83. };
  84.  
  85. template <typename StringContainer>
  86. SearchServer::SearchServer(const StringContainer& stop_words)
  87.     : stop_words_(MakeUniqueNonEmptyStrings(stop_words))
  88. {
  89.     if (!std::all_of(stop_words_.begin(), stop_words_.end(), IsValidWord)) {
  90.         throw std::invalid_argument("Some of stop words are invalid"s);
  91.     }
  92. }
  93.  
  94. template <typename DocumentPredicate>
  95. std::vector<Document> SearchServer::FindTopDocuments(const std::string& raw_query,
  96.     DocumentPredicate document_predicate) const {
  97.     const auto query = ParseQuery(raw_query);
  98.  
  99.     auto matched_documents = FindAllDocuments(query, document_predicate);
  100.  
  101.     std::sort(matched_documents.begin(), matched_documents.end(),
  102.         [](const Document& lhs, const Document& rhs) {
  103.             if (std::abs(lhs.relevance - rhs.relevance) < ACCURACY) {
  104.                 return lhs.rating > rhs.rating;
  105.             }
  106.             else {
  107.                 return lhs.relevance > rhs.relevance;
  108.             }
  109.         });
  110.     if (matched_documents.size() > MAX_RESULT_DOCUMENT_COUNT) {
  111.         matched_documents.resize(MAX_RESULT_DOCUMENT_COUNT);
  112.     }
  113.     return matched_documents;
  114. }
  115.  
  116. template <typename DocumentPredicate>
  117. std::vector<Document> SearchServer::FindAllDocuments(const Query& query,
  118.     DocumentPredicate document_predicate) const {
  119.     std::map<int, double> document_to_relevance;
  120.     for (const std::string& word : query.plus_words) {
  121.         bool flag = 0;
  122.         for (const auto [id, doc] : word_freqs_in_document) {
  123.             if (doc.count(word) > 0) {
  124.                 flag = 1;
  125.                 break;
  126.             }
  127.         }
  128.         if (flag == 0) {
  129.             continue;
  130.         }
  131.         const double inverse_document_freq = ComputeWordInverseDocumentFreq(word);
  132.         for (const auto& [document_id, words] : word_freqs_in_document) {
  133.             if (words.count(word) == 0)
  134.                 continue;
  135.             const auto& document_data = documents_.at(document_id);
  136.             if (document_predicate(document_id, document_data.status, document_data.rating)) {
  137.                 document_to_relevance[document_id] += words.at(word) * inverse_document_freq;
  138.             }
  139.         }
  140.     }
  141.  
  142.     for (const std::string& word : query.minus_words) {
  143.         bool flag = 0;
  144.         for (const auto [id, doc] : word_freqs_in_document) {
  145.             if (doc.count(word) > 0) {
  146.                 flag = 1;
  147.                 break;
  148.             }
  149.         }
  150.         if (flag == 0) {
  151.             continue;
  152.         }
  153.  
  154.         for (const auto& [document_id, words] : word_freqs_in_document) {
  155.             if (words.count(word)>0)
  156.                 document_to_relevance.erase(document_id);
  157.         }
  158.     }
  159.  
  160.     std::vector<Document> matched_documents;
  161.     for (const auto [document_id, relevance] : document_to_relevance) {
  162.         matched_documents.push_back(
  163.             { document_id, relevance, documents_.at(document_id).rating });
  164.     }
  165.     return matched_documents;
  166. }
  167.  
  168. void MatchDocuments(const SearchServer& search_server, const std::string& raw_query);
  169. void FindTopDocuments(const SearchServer& search_server, const std::string& raw_query);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement