SHOW:
|
|
- or go back to the newest paste.
| 1 | #include <algorithm> | |
| 2 | #include <cmath> | |
| 3 | #include <iostream> | |
| 4 | #include <map> | |
| 5 | #include <set> | |
| 6 | #include <string> | |
| 7 | #include <utility> | |
| 8 | #include <vector> | |
| 9 | ||
| 10 | using namespace std; | |
| 11 | ||
| 12 | const int MAX_RESULT_DOCUMENT_COUNT = 5; | |
| 13 | ||
| 14 | string ReadLine() {
| |
| 15 | string s; | |
| 16 | getline(cin, s); | |
| 17 | return s; | |
| 18 | } | |
| 19 | ||
| 20 | int ReadLineWithNumber() {
| |
| 21 | int result; | |
| 22 | cin >> result; | |
| 23 | ReadLine(); | |
| 24 | return result; | |
| 25 | } | |
| 26 | ||
| 27 | vector<string> SplitIntoWords(const string& text) {
| |
| 28 | vector<string> words; | |
| 29 | string word; | |
| 30 | for (const char c : text) {
| |
| 31 | if (c == ' ') {
| |
| 32 | if (!word.empty()) {
| |
| 33 | words.push_back(word); | |
| 34 | word.clear(); | |
| 35 | } | |
| 36 | } else {
| |
| 37 | word += c; | |
| 38 | } | |
| 39 | } | |
| 40 | if (!word.empty()) {
| |
| 41 | words.push_back(word); | |
| 42 | } | |
| 43 | ||
| 44 | return words; | |
| 45 | } | |
| 46 | ||
| 47 | struct Document {
| |
| 48 | Document() = default; | |
| 49 | ||
| 50 | Document(int id, double relevance, int rating) | |
| 51 | : id(id) | |
| 52 | , relevance(relevance) | |
| 53 | , rating(rating) {
| |
| 54 | } | |
| 55 | ||
| 56 | int id = 0; | |
| 57 | double relevance = 0.0; | |
| 58 | int rating = 0; | |
| 59 | }; | |
| 60 | ||
| 61 | template <typename StringContainer> | |
| 62 | set<string> MakeUniqueNonEmptyStrings(const StringContainer& strings) {
| |
| 63 | set<string> non_empty_strings; | |
| 64 | for (const string& str : strings) {
| |
| 65 | if (!str.empty()) {
| |
| 66 | non_empty_strings.insert(str); | |
| 67 | } | |
| 68 | } | |
| 69 | return non_empty_strings; | |
| 70 | } | |
| 71 | ||
| 72 | enum class DocumentStatus {
| |
| 73 | ACTUAL, | |
| 74 | IRRELEVANT, | |
| 75 | BANNED, | |
| 76 | REMOVED, | |
| 77 | }; | |
| 78 | ||
| 79 | class SearchServer {
| |
| 80 | public: | |
| 81 | ||
| 82 | inline static constexpr int INVALID_DOCUMENT_ID = -1; | |
| 83 | ||
| 84 | template <typename StringContainer> | |
| 85 | explicit SearchServer(const StringContainer& stop_words) | |
| 86 | : stop_words_(MakeUniqueNonEmptyStrings(stop_words)) {
| |
| 87 | } | |
| 88 | ||
| 89 | explicit SearchServer(const string& stop_words_text) | |
| 90 | : SearchServer( | |
| 91 | SplitIntoWords(stop_words_text)) // Invoke delegating constructor from string container | |
| 92 | {
| |
| 93 | } | |
| 94 | ||
| 95 | [[nodiscard]] bool AddDocument(int document_id, const string& document, DocumentStatus status, | |
| 96 | const vector<int>& ratings) {
| |
| 97 | if (document_id < 0 || documents_.count(document_id) || IsValidWord(document)==false) {
| |
| 98 | return false; | |
| 99 | }else{
| |
| 100 | const vector<string> words = SplitIntoWordsNoStop(document); | |
| 101 | const double inv_word_count = 1.0 / words.size(); | |
| 102 | for (const string& word : words) {
| |
| 103 | word_to_document_freqs_[word][document_id] += inv_word_count; | |
| 104 | } | |
| 105 | documents_.emplace(document_id, DocumentData{ComputeAverageRating(ratings), status});
| |
| 106 | document_ids.push_back(document_id); | |
| 107 | return true; | |
| 108 | } | |
| 109 | } | |
| 110 | ||
| 111 | template <typename DocumentPredicate> | |
| 112 | [[nodiscard]] bool FindTopDocuments(const string& raw_query, | |
| 113 | DocumentPredicate document_predicate, vector<Document>& result) const {
| |
| 114 | if(IsValidQuery(raw_query)==false){
| |
| 115 | return false; | |
| 116 | }else{
| |
| 117 | const Query query = ParseQuery(raw_query); | |
| 118 | auto matched_documents = FindAllDocuments(query, document_predicate); | |
| 119 | sort(matched_documents.begin(), matched_documents.end(), | |
| 120 | [](const Document& lhs, const Document& rhs) {
| |
| 121 | if (abs(lhs.relevance - rhs.relevance) < 1e-6) {
| |
| 122 | return lhs.rating > rhs.rating; | |
| 123 | } else {
| |
| 124 | return lhs.relevance > rhs.relevance; | |
| 125 | } | |
| 126 | }); | |
| 127 | if (matched_documents.size() > MAX_RESULT_DOCUMENT_COUNT) {
| |
| 128 | matched_documents.resize(MAX_RESULT_DOCUMENT_COUNT); | |
| 129 | } | |
| 130 | result = matched_documents; | |
| 131 | return true; | |
| 132 | } | |
| 133 | } | |
| 134 | ||
| 135 | [[nodiscard]] bool FindTopDocuments(const string& raw_query, DocumentStatus status, vector<Document>& result) const {
| |
| 136 | if(IsValidQuery(raw_query)==false) {
| |
| 137 | return false; | |
| 138 | }else{
| |
| 139 | return FindTopDocuments( | |
| 140 | raw_query, [status](int document_id, DocumentStatus document_status, int rating) {
| |
| 141 | return document_status == status; | |
| 142 | }, result); | |
| 143 | } | |
| 144 | } | |
| 145 | ||
| 146 | [[nodiscard]] bool FindTopDocuments(const string& raw_query, vector<Document>& result) const {
| |
| 147 | if(IsValidQuery(raw_query)==false) {
| |
| 148 | return false; | |
| 149 | }else{
| |
| 150 | return FindTopDocuments(raw_query, DocumentStatus::ACTUAL, result); | |
| 151 | } | |
| 152 | } | |
| 153 | ||
| 154 | int GetDocumentCount() const {
| |
| 155 | return documents_.size(); | |
| 156 | } | |
| 157 | ||
| 158 | [[nodiscard]] bool MatchDocument(const string& raw_query, | |
| 159 | int document_id, tuple<vector<string>, DocumentStatus>& result) const {
| |
| 160 | if(IsValidQuery(raw_query)==false){
| |
| 161 | return false; | |
| 162 | }else{
| |
| 163 | const Query query = ParseQuery(raw_query); | |
| 164 | vector<string> matched_words; | |
| 165 | for (const string& word : query.plus_words) {
| |
| 166 | if (word_to_document_freqs_.count(word) == 0) {
| |
| 167 | continue; | |
| 168 | } | |
| 169 | if (word_to_document_freqs_.at(word).count(document_id)) {
| |
| 170 | matched_words.push_back(word); | |
| 171 | } | |
| 172 | } | |
| 173 | for (const string& word : query.minus_words) {
| |
| 174 | if (word_to_document_freqs_.count(word) == 0) {
| |
| 175 | continue; | |
| 176 | } | |
| 177 | if (word_to_document_freqs_.at(word).count(document_id)) {
| |
| 178 | matched_words.clear(); | |
| 179 | break; | |
| 180 | } | |
| 181 | } | |
| 182 | result = {matched_words, documents_.at(document_id).status};
| |
| 183 | return true; | |
| 184 | } | |
| 185 | } | |
| 186 | ||
| 187 | int GetDocumentId(const int index) const {
| |
| 188 | if(index<0 || !(index<GetDocumentCount())){
| |
| 189 | return SearchServer::INVALID_DOCUMENT_ID; | |
| 190 | } | |
| 191 | return document_ids[index]; | |
| 192 | } | |
| 193 | ||
| 194 | private: | |
| 195 | struct DocumentData {
| |
| 196 | int rating; | |
| 197 | DocumentStatus status; | |
| 198 | }; | |
| 199 | const set<string> stop_words_; | |
| 200 | map<string, map<int, double>> word_to_document_freqs_; | |
| 201 | map<int, DocumentData> documents_; | |
| 202 | vector<int> document_ids; | |
| 203 | ||
| 204 | bool IsStopWord(const string& word) const {
| |
| 205 | return stop_words_.count(word) > 0; | |
| 206 | } | |
| 207 | ||
| 208 | vector<string> SplitIntoWordsNoStop(const string& text) const {
| |
| 209 | vector<string> words; | |
| 210 | for (const string& word : SplitIntoWords(text)) {
| |
| 211 | if (!IsStopWord(word)) {
| |
| 212 | words.push_back(word); | |
| 213 | } | |
| 214 | } | |
| 215 | return words; | |
| 216 | } | |
| 217 | ||
| 218 | static int ComputeAverageRating(const vector<int>& ratings) {
| |
| 219 | if (ratings.empty()) {
| |
| 220 | return 0; | |
| 221 | } | |
| 222 | int rating_sum = 0; | |
| 223 | for (const int rating : ratings) {
| |
| 224 | rating_sum += rating; | |
| 225 | } | |
| 226 | return rating_sum / static_cast<int>(ratings.size()); | |
| 227 | } | |
| 228 | ||
| 229 | struct QueryWord {
| |
| 230 | string data; | |
| 231 | bool is_minus; | |
| 232 | bool is_stop; | |
| 233 | }; | |
| 234 | ||
| 235 | QueryWord ParseQueryWord(string text) const {
| |
| 236 | bool is_minus = false; | |
| 237 | QueryWord result; | |
| 238 | // Word shouldn't be empty | |
| 239 | if (text[0] == '-') {
| |
| 240 | is_minus = true; | |
| 241 | text = text.substr(1); | |
| 242 | } | |
| 243 | result = {text, is_minus, IsStopWord(text)};
| |
| 244 | return result; | |
| 245 | } | |
| 246 | ||
| 247 | struct Query {
| |
| 248 | set<string> plus_words; | |
| 249 | set<string> minus_words; | |
| 250 | }; | |
| 251 | ||
| 252 | Query ParseQuery(const string& text) const {
| |
| 253 | Query query; | |
| 254 | for (const string& word : SplitIntoWords(text)) {
| |
| 255 | const QueryWord query_word = ParseQueryWord(word); | |
| 256 | if (!query_word.is_stop) {
| |
| 257 | if (query_word.is_minus) {
| |
| 258 | query.minus_words.insert(query_word.data); | |
| 259 | } else {
| |
| 260 | query.plus_words.insert(query_word.data); | |
| 261 | } | |
| 262 | } | |
| 263 | } | |
| 264 | return query; | |
| 265 | } | |
| 266 | ||
| 267 | // Existence required | |
| 268 | double ComputeWordInverseDocumentFreq(const string& word) const {
| |
| 269 | return log(GetDocumentCount() * 1.0 / word_to_document_freqs_.at(word).size()); | |
| 270 | } | |
| 271 | ||
| 272 | template <typename DocumentPredicate> | |
| 273 | vector<Document> FindAllDocuments(const Query& query, | |
| 274 | DocumentPredicate document_predicate) const {
| |
| 275 | map<int, double> document_to_relevance; | |
| 276 | for (const string& word : query.plus_words) {
| |
| 277 | if (word_to_document_freqs_.count(word) == 0) {
| |
| 278 | continue; | |
| 279 | } | |
| 280 | const double inverse_document_freq = ComputeWordInverseDocumentFreq(word); | |
| 281 | for (const auto [document_id, term_freq] : word_to_document_freqs_.at(word)) {
| |
| 282 | const auto& document_data = documents_.at(document_id); | |
| 283 | if (document_predicate(document_id, document_data.status, document_data.rating)) {
| |
| 284 | document_to_relevance[document_id] += term_freq * inverse_document_freq; | |
| 285 | } | |
| 286 | } | |
| 287 | } | |
| 288 | ||
| 289 | for (const string& word : query.minus_words) {
| |
| 290 | if (word_to_document_freqs_.count(word) == 0) {
| |
| 291 | continue; | |
| 292 | } | |
| 293 | for (const auto [document_id, _] : word_to_document_freqs_.at(word)) {
| |
| 294 | document_to_relevance.erase(document_id); | |
| 295 | } | |
| 296 | } | |
| 297 | ||
| 298 | vector<Document> matched_documents; | |
| 299 | for (const auto [document_id, relevance] : document_to_relevance) {
| |
| 300 | matched_documents.push_back( | |
| 301 | {document_id, relevance, documents_.at(document_id).rating});
| |
| 302 | } | |
| 303 | return matched_documents; | |
| 304 | } | |
| 305 | ||
| 306 | static bool IsValidWord(const string& word) {
| |
| 307 | return none_of(word.begin(), word.end(), [](char c) {
| |
| 308 | return c >= '\0' && c < ' '; | |
| 309 | }); | |
| 310 | } | |
| 311 | ||
| 312 | static bool IsValidQuery(const string& raw_query) {
| |
| 313 | if(IsValidWord(raw_query)==false) {
| |
| 314 | return false; | |
| 315 | } | |
| 316 | for (int i = 0; i < raw_query.size(); ++i) {
| |
| 317 | if (raw_query[i] == '-' || raw_query[raw_query.size()-1]=='-') {
| |
| 318 | if (raw_query[i + 1] == '-' || raw_query[i + 1] == ' ') {
| |
| 319 | return false; | |
| 320 | } | |
| 321 | } | |
| 322 | } | |
| 323 | return true; | |
| 324 | } | |
| 325 | }; | |
| 326 | ||
| 327 | // ==================== для примера ========================= | |
| 328 | ||
| 329 | void PrintDocument(const Document& document) {
| |
| 330 | cout << "{ "s
| |
| 331 | << "document_id = "s << document.id << ", "s | |
| 332 | << "relevance = "s << document.relevance << ", "s | |
| 333 | << "rating = "s << document.rating << " }"s << endl; | |
| 334 | } | |
| 335 | int main() {
| |
| 336 | SearchServer search_server("и в на"s);
| |
| 337 | // Явно игнорируем результат метода AddDocument, чтобы избежать предупреждения | |
| 338 | // о неиспользуемом результате его вызова | |
| 339 | (void) search_server.AddDocument(1, "пушистый кот пушистый хвост"s, DocumentStatus::ACTUAL, {7, 2, 7});
| |
| 340 | if (!search_server.AddDocument(1, "пушистый пёс и модный ошейник"s, DocumentStatus::ACTUAL, {1, 2})) {
| |
| 341 | cout << "Документ не был добавлен, так как его id совпадает с уже имеющимся"s << endl; | |
| 342 | } | |
| 343 | if (!search_server.AddDocument(-1, "пушистый пёс и модный ошейник"s, DocumentStatus::ACTUAL, {1, 2})) {
| |
| 344 | cout << "Документ не был добавлен, так как его id отрицательный"s << endl; | |
| 345 | } | |
| 346 | if (!search_server.AddDocument(3, "большой пёс скво\x12рец"s, DocumentStatus::ACTUAL, {1, 3, 2})) {
| |
| 347 | cout << "Документ не был добавлен, так как содержит спецсимволы"s << endl; | |
| 348 | } | |
| 349 | vector<Document> documents; | |
| 350 | if (search_server.FindTopDocuments("пушистый -"s, documents)) {
| |
| 351 | for (const Document& document : documents) {
| |
| 352 | PrintDocument(document); | |
| 353 | } | |
| 354 | } else {
| |
| 355 | cout << "Ошибка в поисковом запросе"s << endl; | |
| 356 | } | |
| 357 | } |