Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include "search_server.h"
- #include <math.h>
- #include <stdexcept>
- #include "log_duration.h"
- SearchServer::SearchServer(const std::string& stop_words_text)
- : SearchServer(
- SplitIntoWords(stop_words_text)) // Invoke delegating constructor from string container
- {
- }
- void SearchServer::AddDocument(int document_id, const std::string& document, DocumentStatus status,
- const std::vector<int>& ratings) {
- if ((document_id < 0) || (documents_.count(document_id) > 0)) {
- throw std::invalid_argument("Invalid document_id"s);
- }
- const auto words = SplitIntoWordsNoStop(document);
- std::set<std::string> ndoc(words.begin(), words.end());
- for (const auto& [id, words] : word_freqs_in_document) {
- std::set<std::string> doc;
- for (std::map<std::string, double>::const_iterator it = words.begin(); it != words.end(); ++it)
- doc.insert(it->first);
- if (doc == ndoc) {
- duplicates.push_back(document_id);
- break;
- }
- }
- const double inv_word_count = 1.0 / words.size();
- for (const std::string& word : words) {
- word_freqs_in_document[document_id][word] += inv_word_count;
- }
- documents_.emplace(document_id, DocumentData{ ComputeAverageRating(ratings), status });
- document_ids_.push_back(document_id);
- }
- std::vector<Document> SearchServer::FindTopDocuments(const std::string& raw_query, DocumentStatus status) const {
- return FindTopDocuments(
- raw_query, [status](int document_id, DocumentStatus document_status, int rating) {
- return document_status == status;
- });
- }
- std::vector<Document> SearchServer::FindTopDocuments(const std::string& raw_query) const {
- return FindTopDocuments(raw_query, DocumentStatus::ACTUAL);
- }
- std::tuple<std::vector<std::string>, DocumentStatus> SearchServer::MatchDocument(const std::string& raw_query,
- int document_id) const {
- const auto query = ParseQuery(raw_query);
- std::vector<std::string> matched_words;
- if (word_freqs_in_document.count(document_id) != 0) {
- for (const std::string& word : query.plus_words) {
- if (word_freqs_in_document.at(document_id).count(word) == 0)
- continue;
- matched_words.push_back(word);
- }
- for (const std::string& word : query.minus_words) {
- if (word_freqs_in_document.at(document_id).count(word) == 0)
- continue;
- matched_words.clear();
- break;
- }
- return { matched_words, documents_.at(document_id).status };
- }
- }
- const std::map<std::string, double>& SearchServer::GetWordFrequencies(int document_id) const
- {
- static std::map<std::string, double> empty;
- if (word_freqs_in_document.count(document_id) == 0) {
- return empty;
- }
- return word_freqs_in_document.at(document_id);
- }
- void SearchServer::RemoveDocument(int document_id)
- {
- word_freqs_in_document.erase(document_id);
- documents_.erase(document_id);
- document_ids_.erase((std::remove(document_ids_.begin(), document_ids_.end(), document_id)),document_ids_.end());
- }
- std::vector<std::string> SearchServer::SplitIntoWordsNoStop(const std::string& text) const {
- std::vector<std::string> words;
- for (const std::string& word : SplitIntoWords(text)) {
- if (!IsValidWord(word)) {
- throw std::invalid_argument("Word "s + word + " is invalid"s);
- }
- if (!IsStopWord(word)) {
- words.push_back(word);
- }
- }
- return words;
- }
- int SearchServer::ComputeAverageRating(const std::vector<int>& ratings) {
- if (ratings.empty()) {
- return 0;
- }
- int rating_sum = 0;
- for (const int rating : ratings) {
- rating_sum += rating;
- }
- return rating_sum / static_cast<int>(ratings.size());
- }
- SearchServer::QueryWord SearchServer::ParseQueryWord(const std::string& text) const {
- if (text.empty()) {
- throw std::invalid_argument("Query word is empty"s);
- }
- std::string word = text;
- bool is_minus = false;
- if (word[0] == '-') {
- is_minus = true;
- word = word.substr(1);
- }
- if (word.empty() || word[0] == '-' || !IsValidWord(word)) {
- throw std::invalid_argument("Query word "s + text + " is invalid");
- }
- return { word, is_minus, IsStopWord(word) };
- }
- SearchServer::Query SearchServer::ParseQuery(const std::string& text) const {
- Query result;
- for (const std::string& word : SplitIntoWords(text)) {
- const auto query_word = ParseQueryWord(word);
- if (!query_word.is_stop) {
- if (query_word.is_minus) {
- result.minus_words.insert(query_word.data);
- }
- else {
- result.plus_words.insert(query_word.data);
- }
- }
- }
- return result;
- }
- int SearchServer::GetDocumentCount() const {
- return static_cast<int>(documents_.size());
- }
- std::vector<int>::const_iterator SearchServer::begin() const
- {
- return document_ids_.begin();
- }
- std::vector<int>::const_iterator SearchServer::end() const
- {
- return document_ids_.end();
- }
- bool SearchServer::IsStopWord(const std::string& word) const {
- return stop_words_.count(word) > 0;
- }
- bool SearchServer::IsValidWord(const std::string& word) {
- // A valid word must not contain special characters
- return std::none_of(word.begin(), word.end(), [](char c) {
- return c >= '\0' && c < ' ';
- });
- }
- double SearchServer::ComputeWordInverseDocumentFreq(const std::string& word) const {
- int freq = 0;
- for (const auto [id, doc] : word_freqs_in_document) {
- if (doc.count(word) > 0)
- freq++;
- }
- return log(GetDocumentCount() * 1.0 / freq);
- }
- void MatchDocuments(const SearchServer& search_server, const std::string& raw_query) {
- LOG_DURATION_STREAM("MatchDocuments time", std::cout);
- for (int document_id = 0; document_id < search_server.GetDocumentCount(); ++document_id) {
- try {
- const auto [words, status] =
- search_server.MatchDocument(raw_query, document_id);
- std::cout << "{ document_id = "s << document_id << ", status = "s << static_cast<int>(status) << ", words = ";
- for (std::string word : words)
- std::cout << word << " ";
- std::cout << "}\n";
- }
- catch (const std::out_of_range& ex) {
- std::cout << "{ document_id = "s << document_id << ", there is no document with this id"s << std::endl;
- }
- }
- }
- void FindTopDocuments(const SearchServer& search_server, const std::string& raw_query) {
- LOG_DURATION_STREAM("FindTopDocuments time", std::cout);
- auto documents = search_server.FindTopDocuments(raw_query);
- for (const Document& document : documents)
- std::cout << document << std::endl;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement