Advertisement
VinnRonn

rating

May 30th, 2022
29
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.07 KB | None | 0 0
  1. #include <algorithm>
  2. #include <iostream>
  3. #include <set>
  4. #include <map>
  5. #include <string>
  6. #include <utility>
  7. #include <vector>
  8. #include <cmath>
  9. #include <numeric>
  10.  
  11. using namespace std;
  12.  
  13. const int MAX_RESULT_DOCUMENT_COUNT = 5;
  14.  
  15. string ReadLine() {
  16. string s;
  17. getline(cin, s);
  18. return s;
  19. }
  20.  
  21. int ReadLineWithNumber() {
  22. int result = 0;
  23. cin >> result;
  24. ReadLine();
  25. return result;
  26. }
  27.  
  28. vector<string> SplitIntoWords(const string& text) {
  29. vector<string> words;
  30. string word;
  31. for (const char c : text) {
  32. if (c == ' ') {
  33. if (!word.empty()) {
  34. words.push_back(word);
  35. word.clear();
  36. }
  37. }
  38. else {
  39. word += c;
  40. }
  41. }
  42. if (!word.empty()) {
  43. words.push_back(word);
  44. }
  45.  
  46. return words;
  47. }
  48.  
  49. struct Document {
  50. int id;
  51. double relevance;
  52. int rating;
  53. };
  54.  
  55.  
  56.  
  57. class SearchServer {
  58. public:
  59. void SetStopWords(const string& text) {
  60. for (const string& word : SplitIntoWords(text)) {
  61. stop_words_.insert(word);
  62. }
  63. }
  64.  
  65. void AddDocument(int document_id, const string& document) {
  66. const vector<string> words = SplitIntoWordsNoStop(document);
  67.  
  68. vector<int> ratings;
  69.  
  70. int ratings_count = ReadLineWithNumber();
  71.  
  72. for (int count = 0; count < ratings_count; ++count) {
  73. ratings.push_back(ReadLineWithNumber());
  74. }
  75.  
  76. document_rating_[document_id] = ComputeAverageRating(ratings);
  77.  
  78. double count_word = 0.0;
  79.  
  80. for (const auto& w : words) {
  81. count_word = count(words.begin(), words.end(), w);
  82.  
  83. word_to_documents_[w].insert(document_id);
  84. word_to_document_freqs_[w].insert({ document_id, count_word / words.size() });
  85. }
  86. ++documents_count_;
  87. }
  88.  
  89. vector<Document> FindTopDocuments(const string& raw_query) const {
  90.  
  91. const Query query_words = ParseQuery(raw_query);
  92. auto matched_documents = FindAllDocuments(query_words);
  93.  
  94. sort(matched_documents.begin(), matched_documents.end(),
  95. [](const Document& lhs, const Document& rhs) {
  96. return lhs.relevance > rhs.relevance;
  97. });
  98. if (matched_documents.size() > MAX_RESULT_DOCUMENT_COUNT) {
  99. matched_documents.resize(MAX_RESULT_DOCUMENT_COUNT);
  100. }
  101. return matched_documents;
  102. }
  103.  
  104. int ComputeAverageRating(const vector<int>& ratings) {
  105. int document_rating = accumulate(ratings.begin(), ratings.end(), 0);
  106. return document_rating / static_cast<int>(ratings.size());
  107. }
  108.  
  109. private:
  110.  
  111. struct Query {
  112.  
  113. set<string> plus_words;
  114. set<string> minus_words;
  115. };
  116.  
  117. map<string, set<int>> word_to_documents_;
  118. map<string, map<int, double>> word_to_document_freqs_;
  119.  
  120. set<string> stop_words_;
  121.  
  122. map<int, int> document_rating_;
  123.  
  124. int documents_count_ = 0;
  125.  
  126. bool IsStopWord(const string& word) const {
  127. return stop_words_.count(word) > 0;
  128. }
  129.  
  130. vector<string> SplitIntoWordsNoStop(const string& text) const {
  131. vector<string> words;
  132. for (const string& word : SplitIntoWords(text)) {
  133. if (!IsStopWord(word))
  134. {
  135. words.push_back(word);
  136. }
  137. }
  138. return words;
  139. }
  140.  
  141. Query ParseQuery(const string& text) const {
  142. Query query;
  143. vector<string> raw_words = SplitIntoWordsNoStop(text);
  144.  
  145. for (const string& word : raw_words) {
  146. if (word[0] == '-')
  147. {
  148. query.minus_words.insert(word.substr(1));
  149. }
  150. }
  151. for (const string& min : raw_words) {
  152. if (min[0] != '-' && !query.minus_words.count(min))
  153. {
  154. query.plus_words.insert(min);
  155. }
  156. }
  157. return query;
  158. }
  159.  
  160.  
  161. // IDF-TF
  162.  
  163. vector<Document> FindAllDocuments(const Query& query_words) const {
  164. vector<Document> matched_documents;
  165. map<int, double> document_to_relevance;
  166.  
  167. double IDF = 0.0;
  168.  
  169. //вынести в отдельную функцию учет plus слов
  170.  
  171. for (const auto& plus : query_words.plus_words) {
  172.  
  173. if (word_to_documents_.count(plus))
  174. {
  175. IDF = log(static_cast<double>(documents_count_) / word_to_documents_.at(plus).size());
  176. //cout << word_to_documents_.at(plus).size() << endl << IDF << endl;
  177.  
  178. for (const auto& id : word_to_documents_.at(plus)) {
  179.  
  180. document_to_relevance[id] += IDF * word_to_document_freqs_.at(plus).at(id);
  181. //cout << IDF * (1.0 / documents_size_.at(id)) << endl;
  182.  
  183. };
  184. }
  185. }
  186.  
  187. //вынести в отдельную функцию учет minus слов
  188.  
  189. for (const auto& minus : query_words.minus_words) {
  190.  
  191. if (word_to_documents_.count(minus)) {
  192. for (const auto& id : word_to_documents_.at(minus)) {
  193. document_to_relevance.erase(id);
  194. };
  195. }
  196. }
  197.  
  198. for (const auto& [id, relevance] : document_to_relevance) {
  199. matched_documents.push_back({ id, relevance });
  200. }
  201.  
  202. return matched_documents;
  203. }
  204.  
  205. };
  206.  
  207. SearchServer CreateSearchServer() {
  208. SearchServer search_server;
  209. search_server.SetStopWords(ReadLine());
  210.  
  211. const int document_count = ReadLineWithNumber();
  212.  
  213. for (int document_id = 0; document_id < document_count; ++document_id) {
  214. search_server.AddDocument(document_id, ReadLine());
  215. }
  216.  
  217. return search_server;
  218. }
  219.  
  220. int main() {
  221. const SearchServer search_server = CreateSearchServer();
  222.  
  223. const string query = ReadLine();
  224. for (const auto& [document_id, relevance] : search_server.FindTopDocuments(query)) {
  225. cout << "{ document_id = "s << document_id << ", " << "relevance = "s << relevance << " }"s << endl;
  226. }
  227.  
  228. return 0;
  229.  
  230. }
  231.  
  232.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement