my_rating1

#include <algorithm>
#include <iostream>
#include <set>
#include <map>
#include <string>
#include <utility>
#include <vector>
#include <cmath>

using namespace std;

const int MAX_RESULT_DOCUMENT_COUNT = 5;

string ReadLine() {
    string s;
    getline(cin, s);
    return s;
}

int ReadLineWithNumber() {
    int result = 0;
    cin >> result;
    ReadLine();
    return result;
}

vector<string> SplitIntoWords(const string& text) {

    vector<string> words;
    string word;
    for (const char c : text) {
        if (c == ' ') {
            if (!word.empty()) {
                words.push_back(word);
                word.clear();
            }
        } else {
            word += c;
        }
    }
    if (!word.empty()) {
        words.push_back(word);
    }

    return words;
}

struct Document {
    int id;
    double relevance;
    int rating;
};

class SearchServer {
public:
    void SetStopWords(const string& text) {

        for (const string& word : SplitIntoWords(text)) {
            stop_words_.insert(word);
        }
    }

    void AddDocument(int document_id, const string& document, const vector<int> ratings) {

        // наполняем счетчик документов -- он пригодится для подсчета IDF.
        ++document_count_;

        const vector<string> words = SplitIntoWordsNoStop(document);

        if (ratings.size() > 0 && ratings[0] > 0) {  // второе условие на случай если не просто пуста строка а строка с 0, типа 0 человек дали оценку документу
            documents_rating_[document_id] = ComputeAverageRating(ratings);
        } else {
            documents_rating_[document_id] = 0; // если нет оценок, по условию рэйтинг такого документа равен нулю
        }

        // Рассчитываем TF каждого слова в каждом документе.
        for (const string& word : words) {
            TF_[word][document_id] += 1.0 / words.size();
            index_[word].insert(document_id);
        }
    }

    vector<Document> FindTopDocuments(const string& raw_query) const {

        const PlusMinusWords query_words = ParseQuery(raw_query);

        auto matched_documents = FindAllDocuments(query_words);

        sort(matched_documents.begin(), matched_documents.end(),
             [](const Document& lhs, const Document& rhs) {
                 return lhs.relevance > rhs.relevance;
             });
        if (matched_documents.size() > MAX_RESULT_DOCUMENT_COUNT) {
            matched_documents.resize(MAX_RESULT_DOCUMENT_COUNT);
        }
        return matched_documents;
    }

private:

    struct PlusMinusWords {
        set<string> plus_words;
        set<string> minus_words;
    };

    int document_count_ = 0;

    map<int, int> documents_rating_;

    map<string, map<int, double>> TF_;

    map<string, set<int>> index_;

    set<string> stop_words_;

    bool IsStopWord(const string& word) const {
        return stop_words_.count(word) > 0;
    }

    vector<string> SplitIntoWordsNoStop(const string& text) const {
        vector<string> words;
        for (const string& word : SplitIntoWords(text)) {
            if (!IsStopWord(word)) {
                words.push_back(word);
            }
        }
        return words;
    }

    PlusMinusWords ParseQuery(const string& text) const {

        PlusMinusWords query_words;

        for (const string& word : SplitIntoWordsNoStop(text)) {
            if (word[0] == '-') {
                query_words.minus_words.insert(word.substr(1, word.size()));
            } else {
                if (query_words.minus_words.count(word) == 0) {
                    query_words.plus_words.insert(word);
                }
            }
        }
        return query_words;
    }

    vector<Document> FindAllDocuments(const PlusMinusWords& query_words) const {

        /* Рассчитываем IDF каждого плюс-слова в запросе:
        1) количество документов document_count_ делим на количество документов, где это слово встречается;
        2) берем от полученного значения log.

        Функция AddDocument построила index_, где каждому слову отнесено множество документов, где оно встречается.
        */

        map<string, double> idf; // в результате получим слово из запроса и его посчитанный IDF (не факт, что все слова из запроса обрели IDF, ведь слова может не быть в индексе, а значит знаменателя нет).
        for (const string& word : query_words.plus_words) {
            if (index_.count(word) != 0) { // если слово есть в индексе, значит можно быстро понять, в скольких документах оно есть -- index_.at(word).size().
                idf[word] = log(static_cast<double>(document_count_) / index_.at(word).size());
            }
        }

        map<int, double> matched_documents;

        for (const auto& [word, idf] : idf) { // раз мы идем здесь по словарю idf, значит мы идем по плюс-словам запроса.
            if (TF_.count(word) != 0) { // если плюс-слово запроса есть в TF_, значит по TF_.at(плюс-слово запроса) мы получим все id документов, где это слово имеет вес tf, эти документы интересы.
                for (const auto& [document_id, tf] : TF_.at(word)) { // будем идти по предпосчитанному TF_.at(плюс-слово запроса) и наращивать релевантность документам по их id по офрмуле IDF-TF.
                    matched_documents[document_id] += idf * tf;
                }
            }
        }

        /* теперь надо пройтись по минус словам и посмотреть при помощи index_,
        какие id документов есть по этому слову, и вычеркнуть их из выдачи.
        */

        for (const string& word : query_words.minus_words) {
            if (index_.count(word) != 0) {
                for (const auto& documents_id : index_.at(word)) {
                    if (matched_documents.count(documents_id) != 0) {
                        matched_documents.erase(documents_id);
                    }

                }
            }
        }

        vector<Document> result;

        for (const auto& [document_id, relevance] : matched_documents) {
            result.push_back({document_id, relevance, documents_rating_.at(document_id)});
        }

        return result;
    }

    int ComputeAverageRating(const vector<int>& ratings) {
        int sum_rating = 0;
        for (int i = 1; i <= ratings[0]; ++i) {
            sum_rating += ratings[i];
        }

        return sum_rating / ratings[0];
    }
};

SearchServer CreateSearchServer() {

    SearchServer search_server;
    search_server.SetStopWords(ReadLine());

    const int document_count = ReadLineWithNumber();
    for (int document_id = 0; document_id < document_count; ++document_id) {
        string document = ReadLine();

        vector<string> ratings_line = SplitIntoWords(ReadLine());
        vector<int> ratings;

        if (ratings_line.size() > 0 && stoi(ratings_line[0]) > 0) {
            for (string num : ratings_line) {
                ratings.push_back(stoi(num));
            }
        }
        search_server.AddDocument(document_id, document, ratings);
    }

    return search_server;
}

int main() {

    const SearchServer search_server = CreateSearchServer();

    const string query = ReadLine();

    for (const auto& [document_id, relevance, rating] : search_server.FindTopDocuments(query)) {
        cout <<
        "{ document_id = "s << document_id << ", "
        << "relevance = "s << relevance << ", "s
        << "rating = "s << rating << " }" << endl;
    }
}
/*

тест1 -- обычный --- OK:

i v na
3
belyj kot i modnyj oshejnik
2 8 -3
pushistyj kot pushistyj hvost
3 7 2 7
uhozhennyj pes vyrazitelnye glaza
4 5 -12 2 1
pushistyj uhozhennyj kot

вывод:
{ document_id = 1, relevance = 0.650672, rating = 5 }
{ document_id = 2, relevance = 0.274653, rating = -1 }
{ document_id = 0, relevance = 0.101366, rating = 2 }

тест2 -- с минус-словом "кот", которое должно вычеркнуть 0 и 1 документы --- OK:

i v na
3
belyj kot i modnyj oshejnik
2 8 -3
pushistyj kot pushistyj hvost
3 7 2 7
uhozhennyj pes vyrazitelnye glaza
4 5 -12 2 1
pushistyj uhozhennyj -kot

вывод:
{ document_id = 2, relevance = 0.274653, rating = -1 }

тест3 -- без стоп-слов, релевантность нулевого документа должна измениться,
т.к. "kot" делится на большее количество слов; также есть нулевой рейтинг --- OK:

// пустую строку над тройкой тоже надо копировать, она символизирует отсутсвтие стоп-слов

3
belyj kot i modnyj oshejnik
2 8 -3
pushistyj kot pushistyj hvost

uhozhennyj pes vyrazitelnye glaza
4 5 -12 2 1
pushistyj uhozhennyj kot

вывод:
{ document_id = 1, relevance = 0.650672, rating = 0 }
{ document_id = 2, relevance = 0.274653, rating = -1 }
{ document_id = 0, relevance = 0.081093, rating = 2 }

*/