View difference between Paste ID: J5hMU5dU and EsV4MpZB
SHOW: | | - or go back to the newest paste.
1
#include <algorithm>
2
#include <cmath>
3
#include <iostream>
4
#include <map>
5
#include <set>
6
#include <string>
7
#include <utility>
8
#include <vector>
9
10
using namespace std;
11
12
const int MAX_RESULT_DOCUMENT_COUNT = 5;
13
14
string ReadLine() {
15
    string s;
16
    getline(cin, s);
17
    return s;
18
}
19
20
int ReadLineWithNumber() {
21
    int result;
22
    cin >> result;
23
    ReadLine();
24
    return result;
25
}
26
27
vector<string> SplitIntoWords(const string& text) {
28
    vector<string> words;
29
    string word;
30
    for (const char c : text) {
31
        if (c == ' ') {
32
            if (!word.empty()) {
33
                words.push_back(word);
34
                word.clear();
35
            }
36
        } else {
37
            word += c;
38
        }
39
    }
40
    if (!word.empty()) {
41
        words.push_back(word);
42
    }
43
44
    return words;
45
}
46
47
struct Document {
48
    Document() = default;
49
50
    Document(int id, double relevance, int rating)
51
        : id(id)
52
        , relevance(relevance)
53
        , rating(rating) {
54
    }
55
56
    int id = 0;
57
    double relevance = 0.0;
58
    int rating = 0;
59
};
60
61
template <typename StringContainer>
62
set<string> MakeUniqueNonEmptyStrings(const StringContainer& strings) {
63
    set<string> non_empty_strings;
64
    for (const string& str : strings) {
65
        if (!str.empty()) {
66
            non_empty_strings.insert(str);
67
        }
68
    }
69
    return non_empty_strings;
70
}
71
72
enum class DocumentStatus {
73
    ACTUAL,
74
    IRRELEVANT,
75
    BANNED,
76
    REMOVED,
77
};
78
79
class SearchServer {
80
public:
81
    
82
    inline static constexpr int INVALID_DOCUMENT_ID = -1;
83
    
84
    template <typename StringContainer>
85
    explicit SearchServer(const StringContainer& stop_words)
86
        : stop_words_(MakeUniqueNonEmptyStrings(stop_words)) {
87
    }
88
89
    explicit SearchServer(const string& stop_words_text)
90
        : SearchServer(
91
            SplitIntoWords(stop_words_text))  // Invoke delegating constructor from string container
92
    {
93
    }
94
95
    [[nodiscard]] bool AddDocument(int document_id, const string& document, DocumentStatus status,
96
                     const vector<int>& ratings) {
97
        if (document_id < 0 || documents_.count(document_id) || IsValidWord(document)==false) {
98
            return false;
99
        }else{
100
            const vector<string> words = SplitIntoWordsNoStop(document);
101
            const double inv_word_count = 1.0 / words.size();
102
            for (const string& word : words) {
103
                word_to_document_freqs_[word][document_id] += inv_word_count;
104
            }
105
            documents_.emplace(document_id, DocumentData{ComputeAverageRating(ratings), status});
106
            document_ids.push_back(document_id);
107
            return true;
108
        }
109
    }
110
111
    template <typename DocumentPredicate>
112
   [[nodiscard]] bool FindTopDocuments(const string& raw_query,
113
                                      DocumentPredicate document_predicate, vector<Document>& result) const {
114
        if(IsValidQuery(raw_query)==false){
115
            return false;
116
        }else{
117
            const Query query = ParseQuery(raw_query);
118
            auto matched_documents = FindAllDocuments(query, document_predicate);
119
            sort(matched_documents.begin(), matched_documents.end(),
120
                 [](const Document& lhs, const Document& rhs) {
121
                     if (abs(lhs.relevance - rhs.relevance) < 1e-6) {
122
                         return lhs.rating > rhs.rating;
123
                     } else {
124
                         return lhs.relevance > rhs.relevance;
125
                     }
126
                 });
127
            if (matched_documents.size() > MAX_RESULT_DOCUMENT_COUNT) {
128
                matched_documents.resize(MAX_RESULT_DOCUMENT_COUNT);
129
            }
130
            result = matched_documents;
131
            return true;
132
        }
133
    }
134
135
    [[nodiscard]] bool FindTopDocuments(const string& raw_query, DocumentStatus status, vector<Document>& result) const {
136
        if(IsValidQuery(raw_query)==false) {
137
            return false;
138
        }else{
139
            return FindTopDocuments(
140
            raw_query, [status](int document_id, DocumentStatus document_status, int rating) {
141
                return document_status == status;
142
            }, result);
143
        }
144
    }
145
146
    [[nodiscard]] bool FindTopDocuments(const string& raw_query, vector<Document>& result) const {
147
        if(IsValidQuery(raw_query)==false) {
148
            return false;
149
        }else{
150
            return FindTopDocuments(raw_query, DocumentStatus::ACTUAL, result);
151
        }
152
    }
153
154
    int GetDocumentCount() const {
155
        return documents_.size();
156
    }
157
158
    [[nodiscard]] bool MatchDocument(const string& raw_query,
159
                                        int document_id, tuple<vector<string>, DocumentStatus>& result) const {
160
        if(IsValidQuery(raw_query)==false){
161
            return false;
162
        }else{
163
            const Query query = ParseQuery(raw_query);
164
            vector<string> matched_words;
165
            for (const string& word : query.plus_words) {
166
                if (word_to_document_freqs_.count(word) == 0) {
167
                    continue;
168
                }
169
                if (word_to_document_freqs_.at(word).count(document_id)) {
170
                    matched_words.push_back(word);
171
                }
172
            }
173
            for (const string& word : query.minus_words) {
174
                if (word_to_document_freqs_.count(word) == 0) {
175
                    continue;
176
                }
177
                if (word_to_document_freqs_.at(word).count(document_id)) {
178
                    matched_words.clear();
179
                    break;
180
                }
181
            }
182
            result = {matched_words, documents_.at(document_id).status};
183
            return true;
184
        }
185
    }
186
    
187
    int GetDocumentId(const int index) const {
188
        if(index<0 || !(index<GetDocumentCount())){
189
            return SearchServer::INVALID_DOCUMENT_ID;
190
        }
191
        return document_ids[index];
192
    }
193
194
private:
195
    struct DocumentData {
196
        int rating;
197
        DocumentStatus status;
198
    };
199
    const set<string> stop_words_;
200
    map<string, map<int, double>> word_to_document_freqs_;
201
    map<int, DocumentData> documents_;
202
    vector<int> document_ids;
203
204
    bool IsStopWord(const string& word) const {
205
        return stop_words_.count(word) > 0;
206
    }
207
208
    vector<string> SplitIntoWordsNoStop(const string& text) const {
209
        vector<string> words;
210
        for (const string& word : SplitIntoWords(text)) {
211
            if (!IsStopWord(word)) {
212
                words.push_back(word);
213
            }
214
        }
215
        return words;
216
    }
217
218
    static int ComputeAverageRating(const vector<int>& ratings) {
219
        if (ratings.empty()) {
220
            return 0;
221
        }
222
        int rating_sum = 0;
223
        for (const int rating : ratings) {
224
            rating_sum += rating;
225
        }
226
        return rating_sum / static_cast<int>(ratings.size());
227
    }
228
229
    struct QueryWord {
230
        string data;
231
        bool is_minus;
232
        bool is_stop;
233
    };
234
235
    QueryWord ParseQueryWord(string text) const {
236
        bool is_minus = false;
237
        QueryWord result;
238
        // Word shouldn't be empty
239
        if (text[0] == '-') {
240
            is_minus = true;
241
            text = text.substr(1);
242
        }
243
        result = {text, is_minus, IsStopWord(text)};
244
        return result;
245
    }
246
247
    struct Query {
248
        set<string> plus_words;
249
        set<string> minus_words;
250
    };
251
252
    Query ParseQuery(const string& text) const {
253
        Query query;
254
        for (const string& word : SplitIntoWords(text)) {
255
            const QueryWord query_word = ParseQueryWord(word);
256
            if (!query_word.is_stop) {
257
                if (query_word.is_minus) {
258
                    query.minus_words.insert(query_word.data);
259
                } else {
260
                    query.plus_words.insert(query_word.data);
261
                }
262
            }
263
        }
264
        return query;
265
    }
266
267
    // Existence required
268
    double ComputeWordInverseDocumentFreq(const string& word) const {
269
        return log(GetDocumentCount() * 1.0 / word_to_document_freqs_.at(word).size());
270
    }
271
272
    template <typename DocumentPredicate>
273
    vector<Document> FindAllDocuments(const Query& query,
274
                                      DocumentPredicate document_predicate) const {
275
        map<int, double> document_to_relevance;
276
        for (const string& word : query.plus_words) {
277
            if (word_to_document_freqs_.count(word) == 0) {
278
                continue;
279
            }
280
            const double inverse_document_freq = ComputeWordInverseDocumentFreq(word);
281
            for (const auto [document_id, term_freq] : word_to_document_freqs_.at(word)) {
282
                const auto& document_data = documents_.at(document_id);
283
                if (document_predicate(document_id, document_data.status, document_data.rating)) {
284
                    document_to_relevance[document_id] += term_freq * inverse_document_freq;
285
                }
286
            }
287
        }
288
289
        for (const string& word : query.minus_words) {
290
            if (word_to_document_freqs_.count(word) == 0) {
291
                continue;
292
            }
293
            for (const auto [document_id, _] : word_to_document_freqs_.at(word)) {
294
                document_to_relevance.erase(document_id);
295
            }
296
        }
297
298
        vector<Document> matched_documents;
299
        for (const auto [document_id, relevance] : document_to_relevance) {
300
            matched_documents.push_back(
301
                {document_id, relevance, documents_.at(document_id).rating});
302
        }
303
        return matched_documents;
304
    }
305
    
306
    static bool IsValidWord(const string& word) {
307
        return none_of(word.begin(), word.end(), [](char c) {
308
            return c >= '\0' && c < ' ';
309
            });
310
    }
311
    
312
    static bool IsValidQuery(const string& raw_query) {
313
        if(IsValidWord(raw_query)==false) {
314
            return false;
315
        }
316
        for (int i = 0; i < raw_query.size(); ++i) {
317
            if (raw_query[i] == '-' || raw_query[raw_query.size()-1]=='-') {
318
                if (raw_query[i + 1] == '-' || raw_query[i + 1] == ' ') {
319
                    return false;
320
                }
321
            }
322
        } 
323
        return true;
324
    }
325
};
326
327
// ==================== для примера =========================
328
329
void PrintDocument(const Document& document) {
330
    cout << "{ "s
331
         << "document_id = "s << document.id << ", "s
332
         << "relevance = "s << document.relevance << ", "s
333
         << "rating = "s << document.rating << " }"s << endl;
334
}
335
int main() {
336
    SearchServer search_server("и в на"s);
337
    // Явно игнорируем результат метода AddDocument, чтобы избежать предупреждения
338
    // о неиспользуемом результате его вызова
339
    (void) search_server.AddDocument(1, "пушистый кот пушистый хвост"s, DocumentStatus::ACTUAL, {7, 2, 7});
340
    if (!search_server.AddDocument(1, "пушистый пёс и модный ошейник"s, DocumentStatus::ACTUAL, {1, 2})) {
341
        cout << "Документ не был добавлен, так как его id совпадает с уже имеющимся"s << endl;
342
    }
343
    if (!search_server.AddDocument(-1, "пушистый пёс и модный ошейник"s, DocumentStatus::ACTUAL, {1, 2})) {
344
        cout << "Документ не был добавлен, так как его id отрицательный"s << endl;
345
    }
346
    if (!search_server.AddDocument(3, "большой пёс скво\x12рец"s, DocumentStatus::ACTUAL, {1, 3, 2})) {
347
        cout << "Документ не был добавлен, так как содержит спецсимволы"s << endl;
348
    }
349
    vector<Document> documents;
350
    if (search_server.FindTopDocuments("пушистый -"s, documents)) {
351
        for (const Document& document : documents) {
352
            PrintDocument(document);
353
        }
354
    } else {
355
        cout << "Ошибка в поисковом запросе"s << endl;
356
    }
357
}