Advertisement
force1987

search_server.cpp

Mar 3rd, 2023
900
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 6.86 KB | None | 0 0
  1. #include "search_server.h"
  2. #include <math.h>
  3. #include <stdexcept>
  4. #include "log_duration.h"
  5.  
  6. SearchServer::SearchServer(const std::string& stop_words_text)
  7.     : SearchServer(
  8.         SplitIntoWords(stop_words_text))  // Invoke delegating constructor from string container
  9. {
  10. }
  11.  
  12. void SearchServer::AddDocument(int document_id, const std::string& document, DocumentStatus status,
  13.     const std::vector<int>& ratings) {
  14.     if ((document_id < 0) || (documents_.count(document_id) > 0)) {
  15.         throw std::invalid_argument("Invalid document_id"s);
  16.     }
  17.     const auto words = SplitIntoWordsNoStop(document);
  18.     std::set<std::string> ndoc(words.begin(), words.end());
  19.     for (const auto& [id, words] : word_freqs_in_document) {
  20.         std::set<std::string> doc;
  21.         for (std::map<std::string, double>::const_iterator it = words.begin(); it != words.end(); ++it)
  22.             doc.insert(it->first);
  23.         if (doc == ndoc) {
  24.             duplicates.push_back(document_id);
  25.             break;
  26.         }
  27.     }
  28.     const double inv_word_count = 1.0 / words.size();
  29.     for (const std::string& word : words) {
  30.         word_freqs_in_document[document_id][word] += inv_word_count;
  31.     }
  32.     documents_.emplace(document_id, DocumentData{ ComputeAverageRating(ratings), status });
  33.     document_ids_.push_back(document_id);
  34. }
  35.  
  36. std::vector<Document> SearchServer::FindTopDocuments(const std::string& raw_query, DocumentStatus status) const {
  37.     return FindTopDocuments(
  38.         raw_query, [status](int document_id, DocumentStatus document_status, int rating) {
  39.             return document_status == status;
  40.         });
  41. }
  42.  
  43. std::vector<Document> SearchServer::FindTopDocuments(const std::string& raw_query) const {
  44.     return FindTopDocuments(raw_query, DocumentStatus::ACTUAL);
  45. }
  46.  
  47. std::tuple<std::vector<std::string>, DocumentStatus> SearchServer::MatchDocument(const std::string& raw_query,
  48.     int document_id) const {
  49.     const auto query = ParseQuery(raw_query);
  50.  
  51.     std::vector<std::string> matched_words;
  52.     if (word_freqs_in_document.count(document_id) != 0) {
  53.         for (const std::string& word : query.plus_words) {
  54.             if (word_freqs_in_document.at(document_id).count(word) == 0)
  55.                 continue;
  56.             matched_words.push_back(word);
  57.         }
  58.         for (const std::string& word : query.minus_words) {
  59.             if (word_freqs_in_document.at(document_id).count(word) == 0)
  60.                 continue;
  61.             matched_words.clear();
  62.             break;
  63.         }
  64.         return { matched_words, documents_.at(document_id).status };
  65.     }
  66. }
  67.    
  68.    
  69.  
  70. const std::map<std::string, double>& SearchServer::GetWordFrequencies(int document_id) const
  71. {
  72.     static std::map<std::string, double> empty;
  73.     if (word_freqs_in_document.count(document_id) == 0) {
  74.         return empty;
  75.     }
  76.     return word_freqs_in_document.at(document_id);
  77. }
  78.  
  79. void SearchServer::RemoveDocument(int document_id)
  80. {
  81.     word_freqs_in_document.erase(document_id);
  82.     documents_.erase(document_id);
  83.     document_ids_.erase((std::remove(document_ids_.begin(), document_ids_.end(), document_id)),document_ids_.end());
  84. }
  85.  
  86. std::vector<std::string> SearchServer::SplitIntoWordsNoStop(const std::string& text) const {
  87.     std::vector<std::string> words;
  88.     for (const std::string& word : SplitIntoWords(text)) {
  89.         if (!IsValidWord(word)) {
  90.             throw std::invalid_argument("Word "s + word + " is invalid"s);
  91.         }
  92.         if (!IsStopWord(word)) {
  93.             words.push_back(word);
  94.         }
  95.     }
  96.     return words;
  97. }
  98.  
  99. int SearchServer::ComputeAverageRating(const std::vector<int>& ratings) {
  100.     if (ratings.empty()) {
  101.         return 0;
  102.     }
  103.     int rating_sum = 0;
  104.     for (const int rating : ratings) {
  105.         rating_sum += rating;
  106.     }
  107.     return rating_sum / static_cast<int>(ratings.size());
  108. }
  109.  
  110. SearchServer::QueryWord SearchServer::ParseQueryWord(const std::string& text) const {
  111.     if (text.empty()) {
  112.         throw std::invalid_argument("Query word is empty"s);
  113.     }
  114.     std::string word = text;
  115.     bool is_minus = false;
  116.     if (word[0] == '-') {
  117.         is_minus = true;
  118.         word = word.substr(1);
  119.     }
  120.     if (word.empty() || word[0] == '-' || !IsValidWord(word)) {
  121.         throw std::invalid_argument("Query word "s + text + " is invalid");
  122.     }
  123.  
  124.     return { word, is_minus, IsStopWord(word) };
  125. }
  126.  
  127. SearchServer::Query SearchServer::ParseQuery(const std::string& text) const {
  128.     Query result;
  129.     for (const std::string& word : SplitIntoWords(text)) {
  130.         const auto query_word = ParseQueryWord(word);
  131.         if (!query_word.is_stop) {
  132.             if (query_word.is_minus) {
  133.                 result.minus_words.insert(query_word.data);
  134.             }
  135.             else {
  136.                 result.plus_words.insert(query_word.data);
  137.             }
  138.         }
  139.     }
  140.     return result;
  141. }
  142.  
  143. int SearchServer::GetDocumentCount() const {
  144.     return static_cast<int>(documents_.size());
  145. }
  146.  
  147. std::vector<int>::const_iterator SearchServer::begin() const
  148. {
  149.     return document_ids_.begin();
  150. }
  151.  
  152. std::vector<int>::const_iterator SearchServer::end() const
  153. {
  154.     return document_ids_.end();
  155. }
  156.  
  157. bool SearchServer::IsStopWord(const std::string& word) const {
  158.     return stop_words_.count(word) > 0;
  159. }
  160.  
  161. bool SearchServer::IsValidWord(const std::string& word) {
  162.     // A valid word must not contain special characters
  163.     return std::none_of(word.begin(), word.end(), [](char c) {
  164.         return c >= '\0' && c < ' ';
  165.         });
  166. }
  167.  
  168. double SearchServer::ComputeWordInverseDocumentFreq(const std::string& word) const {
  169.     int freq = 0;
  170.     for (const auto [id, doc] : word_freqs_in_document) {
  171.         if (doc.count(word) > 0)
  172.             freq++;
  173.     }
  174.     return log(GetDocumentCount() * 1.0 / freq);
  175. }
  176.  
  177. void MatchDocuments(const SearchServer& search_server, const std::string& raw_query) {
  178.     LOG_DURATION_STREAM("MatchDocuments time", std::cout);
  179.     for (int document_id = 0; document_id < search_server.GetDocumentCount(); ++document_id) {
  180.         try {
  181.             const auto [words, status] =
  182.                 search_server.MatchDocument(raw_query, document_id);
  183.             std::cout << "{ document_id = "s << document_id << ", status = "s << static_cast<int>(status) << ", words = ";
  184.             for (std::string word : words)
  185.                 std::cout << word << " ";
  186.             std::cout << "}\n";
  187.         }
  188.         catch (const std::out_of_range& ex) {
  189.             std::cout << "{ document_id = "s << document_id << ", there is no document with this id"s << std::endl;
  190.         }
  191.     }
  192. }
  193.  
  194. void FindTopDocuments(const SearchServer& search_server, const std::string& raw_query) {
  195.     LOG_DURATION_STREAM("FindTopDocuments time", std::cout);
  196.     auto documents = search_server.FindTopDocuments(raw_query);
  197.     for (const Document& document : documents)
  198.         std::cout << document << std::endl;
  199. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement