Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <algorithm>
- #include <cassert>
- #include <iostream>
- #include <set>
- #include <map>
- #include <string>
- #include <utility>
- #include <vector>
- #include <cmath>
- #include <numeric>
- using namespace std;
- const int MAX_RESULT_DOCUMENT_COUNT = 5;
- const double EPSILON = 1e-6;
- template<typename T, typename U>
- ostream& operator<<(ostream& out, const pair<T, U>& v) {
- out << v.first << ": " << v.second;
- return out;
- }
- template<typename Element>
- void Print(ostream& out, const Element& container) {
- bool is_first = true;
- for (const auto& element : container) {
- if (!is_first) {
- out << ", "s;
- }
- is_first = false;
- out << element;
- }
- }
- template<typename T>
- ostream& operator<<(ostream& out, const vector<T>& v) {
- out << "[";
- Print(out, v);
- out << "]";
- return out;
- }
- template<typename T>
- ostream& operator<<(ostream& out, const set<T>& v) {
- out << "{";
- Print(out, v);
- out << "}";
- return out;
- }
- template<typename T, typename U>
- ostream& operator<<(ostream& out, const map<T, U>& m) {
- out << "{";
- Print(out, m);
- out << "}";
- return out;
- }
- string ReadLine() {
- string s;
- getline(cin, s);
- return s;
- }
- int ReadLineWithNumber() {
- int result = 0;
- cin >> result;
- ReadLine();
- return result;
- }
- // type of doc
- enum class DocumentStatus {
- ACTUAL,
- IRRELEVANT,
- BANNED,
- REMOVED
- };
- vector<string> SplitIntoWords(const string& text) {
- vector<string> words;
- string word;
- for (const char c : text) {
- if (c == ' ') {
- if (!word.empty()) {
- words.push_back(word);
- word.clear();
- }
- }
- else {
- word += c;
- }
- }
- if (!word.empty()) {
- words.push_back(word);
- }
- return words;
- }
- struct Document {
- Document() = default;
- Document(int id1, double relevance1, int rating1) :
- id(id1),
- relevance(relevance1),
- rating(rating1) {}
- int id = 0;
- double relevance = 0.0;
- int rating = 0;
- };
- bool FindWord(const string& word, const map<string, set<int>>& word_to_documents, const int& id) {
- return word_to_documents.count(word) && word_to_documents.at(word).count(id);
- }
- void PrintMatchDocumentResult(int document_id, const vector<string>& words, DocumentStatus status) {
- cout << "{ "s
- << "document_id = "s << document_id << ", "s
- << "status = "s << static_cast<int>(status) << ", "s
- << "words ="s;
- for (const string& word : words) {
- cout << ' ' << word;
- }
- cout << "}"s << endl;
- }
- class SearchServer {
- public:
- inline static constexpr int INVALID_DOCUMENT_ID = -1;
- int GetDocumentId(int index) const {
- if (index < documents_id_.size() && index >= 0) {
- return documents_id_[index];
- }
- return SearchServer::INVALID_DOCUMENT_ID;
- }
- SearchServer() = default;
- explicit SearchServer(const string& stop_words) {
- SetStopWords(stop_words);
- }
- template<typename StringCollection>
- SearchServer(const StringCollection& stop_words) {
- for (const auto& word : stop_words) {
- if (!word.empty()) {
- stop_words_.insert(word);
- }
- }
- }
- void SetStopWords(const string& text) {
- for (const string& word : SplitIntoWords(text)) {
- stop_words_.insert(word);
- }
- }
- [[nodiscard]] bool AddDocument(int document_id, const string& document, DocumentStatus status, const vector<int>& ratings) {
- for (const auto& w : SplitIntoWords(document)) {
- if (IsValidWord(w)) {
- continue;
- }
- else {
- return false;
- }
- }
- const vector<string> words = SplitIntoWordsNoStop(document);
- double count_word = 0.0;
- for (const auto& w : words) {
- count_word = count(words.begin(), words.end(), w);
- word_to_documents_[w].insert(document_id);
- word_to_document_freqs_[w].insert({ document_id, count_word / words.size() });
- }
- if (document_id < 0) {
- return false;
- }
- if (count(begin(documents_id_), end(documents_id_), document_id) > 0) {
- return false;
- }
- else {
- documents_id_.push_back(document_id);
- sort(begin(documents_id_), end(documents_id_));
- document_rating_[document_id] = ComputeAverageRating(ratings);
- document_status_[document_id] = status;
- }
- ++documents_count_;
- return true;
- }
- template<typename DocumentPredicate>
- [[nodiscard]] bool FindTopDocuments(const string& raw_query, DocumentPredicate document_predicate, vector<Document>& result) const {
- for (const auto& w : SplitIntoWords(raw_query)) {
- if (!IsValidWord(w)) {
- return false;
- }
- }
- const Query query_words = ParseQuery(raw_query);
- for (const auto& word : query_words.minus_words) {
- if (word[0] == '-' || word.empty()) {
- return false;
- }
- }
- auto matched_documents = FindAllDocuments(query_words, document_predicate);
- sort(matched_documents.begin(), matched_documents.end(),
- [](const Document& lhs, const Document& rhs) {
- if (abs(lhs.relevance - rhs.relevance) < EPSILON) {
- return lhs.rating > rhs.rating;
- }
- else {
- return lhs.relevance > rhs.relevance;
- }
- });
- if (matched_documents.size() > MAX_RESULT_DOCUMENT_COUNT) {
- matched_documents.resize(MAX_RESULT_DOCUMENT_COUNT);
- }
- result = matched_documents;
- return true;
- }
- [[nodiscard]] bool FindTopDocuments(const string& raw_query, DocumentStatus status, vector<Document>& result) const {
- return FindTopDocuments(raw_query, [status](int document_id, DocumentStatus n_status, int rating)
- { return n_status == status; }, result);
- }
- [[nodiscard]] bool FindTopDocuments(const string& raw_query, vector<Document>& result) const {
- return FindTopDocuments(raw_query, DocumentStatus::ACTUAL, result);
- }
- static int ComputeAverageRating(const vector<int>& ratings) {
- if (ratings.empty()) {
- return 0;
- }
- int document_rating = accumulate(ratings.begin(), ratings.end(), 0);
- return document_rating / static_cast<int>(ratings.size());
- }
- //tuple<vector<string>, DocumentStatus> match_doc;
- [[nodiscard]] bool MatchDocument(const string& raw_query, int document_id, tuple<vector<string>, DocumentStatus>& result) const {
- //исключаю пустой запрос и кривой id
- if (raw_query.empty() || document_id > documents_count_) {
- result = { {}, document_status_.at(document_id) };
- return true;
- }
- Query query;
- query = ParseQuery(raw_query);
- for (const auto& minus : query.minus_words) {
- if (!IsValidWord(minus)) {
- return false;
- }
- if (FindWord(minus, word_to_documents_, document_id)) {
- result = { {}, document_status_.at(document_id) };
- return true;
- }
- }
- vector<string> words;
- for (const auto& plus : query.plus_words) {
- if (!IsValidWord(plus)) {
- return false;
- }
- if (FindWord(plus, word_to_documents_, document_id)) {
- words.push_back(plus);
- }
- }
- sort(words.begin(), words.end());
- result = { words, document_status_.at(document_id) };
- return true;
- };
- int GetDocumentCount() {
- return documents_count_;
- }
- private:
- struct Status { // почему не хранить статус в документе?
- int id;
- double rating;
- DocumentStatus status;
- };
- struct Query {
- set<string> plus_words;
- set<string> minus_words;
- };
- map<string, set<int>> word_to_documents_;
- map<string, map<int, double>> word_to_document_freqs_;
- set<string> stop_words_;
- map<int, int> document_rating_;
- map<int, DocumentStatus> document_status_;
- vector<int> documents_id_;
- int documents_count_ = 0;
- bool IsStopWord(const string& word) const {
- return stop_words_.count(word) > 0;
- }
- vector<string> SplitIntoWordsNoStop(const string& text) const {
- vector<string> words;
- vector<string> split_into_words;
- for (const string& word : SplitIntoWords(text)) {
- if (!IsStopWord(word))
- {
- words.push_back(word);
- }
- }
- return words;
- }
- Query ParseQuery(const string& text) const {
- Query query;
- vector<string> raw_words = SplitIntoWordsNoStop(text);
- for (const string& word : raw_words) {
- if (word[0] == '-')
- {
- query.minus_words.insert(word.substr(1));
- }
- }
- for (const string& min : raw_words) {
- if (min[0] != '-' && !query.minus_words.count(min))
- {
- query.plus_words.insert(min);
- }
- }
- return query;
- }
- // IDF-TF
- template<typename DocumentPredicate>
- vector<Document> FindAllDocuments(const Query& query_words, DocumentPredicate document_predicate) const {
- map<int, double> document_to_relevance;
- for (const auto& plus : query_words.plus_words) {
- if (word_to_documents_.count(plus)) {
- const double IDF = log(static_cast<double>(documents_count_) / word_to_documents_.at(plus).size());
- for (const auto& id : word_to_documents_.at(plus)) {
- if (document_predicate(id, document_status_.at(id), document_rating_.at(id))) {
- document_to_relevance[id] += IDF * word_to_document_freqs_.at(plus).at(id);
- }
- }
- }
- }
- for (const auto& minus : query_words.minus_words) {
- if (word_to_documents_.count(minus)) {
- for (const auto& id : word_to_documents_.at(minus)) {
- document_to_relevance.erase(id);
- };
- }
- }
- vector<Document> matched_documents;
- for (const auto& [id, relevance] : document_to_relevance) {
- matched_documents.push_back({ id, relevance, document_rating_.at(id) });
- }
- return matched_documents;
- }
- static bool IsValidWord(const string& word) {
- // A valid word must not contain special characters
- return none_of(word.begin(), word.end(), [](char c) {
- return c >= '\0' && c < ' ';
- });
- }
- };
- /*
- Подставьте сюда вашу реализацию макросов
- ASSERT, ASSERT_EQUAL, ASSERT_EQUAL_HINT, ASSERT_HINT и RUN_TEST
- */
- //template<typename T, typename U>
- //void AssertEqualImlp(const T& t, const U& u, const string& t_str, const string& u_str,
- // const string& file, const string& func, const int& line, const string& hint) {
- // if (t != u) {
- // cerr << boolalpha;
- // cerr << file << "("s << line << "): "s << func << ": "s;
- // cerr << "ASSERT_EQUAL("s << t_str << ", "s << u_str << ") failed: "s;
- // cerr << t << "!=" << u << "."s;
- // if (!hint.empty()) {
- // cerr << "Hint: "s << hint;
- // }
- // cerr << endl;
- // abort();
- // }
- //}
- //
- //#define ASSERT_EQUAL(a, b) AssertEqualImlp((a), (b), #a, #b, __FILE__, __FUNCTION__, __LINE__, ""s)
- //#define ASSERT_EQUAL_HINT(a, b) AssertEqualImlp((a), (b), #a, #b, __FILE__, __FUNCTION__, __LINE__, (hint))
- //
- //
- //void AssertImlp(bool value, const string& expr_str,
- // const string& file,
- // const string& func,
- // const int& line,
- // const string& hint) {
- // if (!value) {
- // cerr << file << ": "s << "("s << line << ") "s << func << ": "s;
- // cerr << "ASSERT"s << expr_str << ", "s << ") failed: "s << value;
- // if (!hint.empty()) {
- // cerr << "Hint: "s << hint;
- // }
- // cerr << endl;
- // abort();
- // }
- //}
- //
- //#define ASSERT(expr) AssertImlp((expr), #expr, __FILE__, __FUNCTION__, __LINE__, ""s)
- //#define ASSERT_HINT(expr) AssertImlp((expr), #expr, __FILE__, __FUNCTION__, __LINE__, (hint))
- // -------- Начало модульных тестов поисковой системы ----------
- // Тест проверяет, что поисковая система исключает стоп-слова при добавлении документов
- // --------- Окончание модульных тестов поисковой системы -----------
- void PrintDocument(const Document& document) {
- cout << "{ "s
- << "document_id = "s << document.id << ", "s
- << "relevance = "s << document.relevance << ", "s
- << "rating = "s << document.rating
- << " }"s << endl;
- }
- int main() {
- //TestSearchServer();
- // Если вы видите эту строку, значит все тесты прошли успешно
- //cout << "Search server testing finished"s << endl;
- SearchServer search_server("и в на"s);
- // Явно игнорируем результат метода AddDocument, чтобы избежать предупреждения
- // о неиспользуемом результате его вызова
- (void)search_server.AddDocument(1, "пуш-истый кот пушистый хвост"s, DocumentStatus::ACTUAL, { 7, 2, 7 });
- if (!search_server.AddDocument(1, "пушистый пёс и модный ошейник"s, DocumentStatus::ACTUAL, { 1, 2 })) {
- cout << "Документ не был добавлен, так как его id совпадает с уже имеющимся"s << endl;
- }
- if (!search_server.AddDocument(-1, "пушистый пёс и модный ошейник"s, DocumentStatus::ACTUAL, { 1, 2 })) {
- cout << "Документ не был добавлен, так как его id отрицательный"s << endl;
- }
- if (!search_server.AddDocument(3, "большой пёс скво\x12рец"s, DocumentStatus::ACTUAL, { 1, 3, 2 })) {
- cout << "Документ не был добавлен, так как содержит спецсимволы"s << endl;
- }
- vector<Document> documents;
- if (search_server.FindTopDocuments("--пушистый"s, documents)) {
- for (const Document& document : documents) {
- PrintDocument(document);
- }
- }
- else {
- cout << "Ошибка в поисковом запросе"s << endl;
- }
- {
- vector<Document> documents;
- if (search_server.FindTopDocuments("кот -"s, documents)) {
- for (const Document& document : documents) {
- PrintDocument(document);
- }
- }
- else {
- cout << "Ошибка в поисковом запросе11"s << endl;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement