Advertisement
12311k

Untitled

Mar 30th, 2021
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.53 KB | None | 0 0
  1. // final130.cpp
  2.  
  3. #include <algorithm>
  4. #include <cmath>
  5. #include <iostream>
  6. #include <map>
  7. #include <set>
  8. #include <string>
  9. #include <utility>
  10. #include <vector>
  11.  
  12. using namespace std;
  13.  
  14.  
  15. const int MAX_RESULT_DOCUMENT_COUNT = 5;
  16.  
  17. string ReadLine() {
  18. string s;
  19. getline(cin, s);
  20. return s;
  21. }
  22.  
  23. int ReadLineWithNumber() {
  24. int result;
  25. cin >> result;
  26. ReadLine();
  27. return result;
  28. }
  29.  
  30. vector<string> SplitIntoWords(const string& text) {
  31. vector<string> words;
  32. string word;
  33. for (const char c : text) {
  34. if (c == ' ') {
  35. words.push_back(word);
  36. word = "";
  37. } else {
  38. word += c;
  39. }
  40. }
  41. words.push_back(word);
  42.  
  43. return words;
  44. }
  45.  
  46. struct Document {
  47. int id;
  48. double relevance;
  49. };
  50.  
  51. class SearchServer {
  52. public:
  53. void SetStopWords(const string& text) {
  54. for (const string& word : SplitIntoWords(text)) {
  55. stop_words_.insert(word);
  56. }
  57. }
  58.  
  59. void AddDocument(int document_id, const string& document) {
  60. ++document_count_;
  61. const vector<string> words = SplitIntoWordsNoStop(document);
  62. const double inv_word_count = 1.0 / words.size();
  63. for (const string& word : words) {
  64. word_to_document_freqs_[word][document_id] += inv_word_count;
  65. }
  66. }
  67.  
  68. vector<Document> FindTopDocuments(const string& raw_query) const {
  69. const Query query = ParseQuery(raw_query);
  70. auto matched_documents = FindAllDocuments(query);
  71.  
  72. sort(matched_documents.begin(), matched_documents.end(),
  73. [](const Document& lhs, const Document& rhs) {
  74. return lhs.relevance > rhs.relevance;
  75. });
  76. if (matched_documents.size() > MAX_RESULT_DOCUMENT_COUNT) {
  77. matched_documents.resize(MAX_RESULT_DOCUMENT_COUNT);
  78. }
  79. return matched_documents;
  80. }
  81.  
  82. private:
  83. int document_count_ = 0;
  84. set<string> stop_words_;
  85. map<string, map<int, double>> word_to_document_freqs_;
  86.  
  87. bool IsStopWord(const string& word) const {
  88. return stop_words_.count(word) > 0;
  89. }
  90.  
  91. vector<string> SplitIntoWordsNoStop(const string& text) const {
  92. vector<string> words;
  93. for (const string& word : SplitIntoWords(text)) {
  94. if (!IsStopWord(word)) {
  95. words.push_back(word);
  96. }
  97. }
  98. return words;
  99. }
  100.  
  101. struct QueryWord {
  102. string data;
  103. bool is_minus;
  104. bool is_stop;
  105. };
  106.  
  107. QueryWord ParseQueryWord(string text) const {
  108. bool is_minus = false;
  109. // Word shouldn't be empty
  110. if (text[0] == '-') {
  111. is_minus = true;
  112. text = text.substr(1);
  113. }
  114. return {
  115. text,
  116. is_minus,
  117. IsStopWord(text)
  118. };
  119. }
  120.  
  121. struct Query {
  122. set<string> plus_words;
  123. set<string> minus_words;
  124. };
  125.  
  126. Query ParseQuery(const string& text) const {
  127. Query query;
  128. for (const string& word : SplitIntoWords(text)) {
  129. const QueryWord query_word = ParseQueryWord(word);
  130. if (!query_word.is_stop) {
  131. if (query_word.is_minus) {
  132. query.minus_words.insert(query_word.data);
  133. } else {
  134. query.plus_words.insert(query_word.data);
  135. }
  136. }
  137. }
  138. return query;
  139. }
  140.  
  141. // Existence required
  142. double ComputeWordInverseDocumentFreq(const string& word) const {
  143. return log(document_count_ * 1.0 / word_to_document_freqs_.at(word).size());
  144. }
  145.  
  146. vector<Document> FindAllDocuments(const Query& query) const {
  147. map<int, double> document_to_relevance;
  148. for (const string& word : query.plus_words) {
  149. if (word_to_document_freqs_.count(word) == 0) {
  150. continue;
  151. }
  152. const double inverse_document_freq = ComputeWordInverseDocumentFreq(word);
  153. for (const auto [document_id, term_freq] : word_to_document_freqs_.at(word)) {
  154. document_to_relevance[document_id] += term_freq * inverse_document_freq;
  155. }
  156. }
  157.  
  158. for (const string& word : query.minus_words) {
  159. if (word_to_document_freqs_.count(word) == 0) {
  160. continue;
  161. }
  162. for (const auto [document_id, _] : word_to_document_freqs_.at(word)) {
  163. document_to_relevance.erase(document_id);
  164. }
  165. }
  166.  
  167. vector<Document> matched_documents;
  168. for (const auto [document_id, relevance] : document_to_relevance) {
  169. matched_documents.push_back({document_id, relevance});
  170. }
  171. return matched_documents;
  172. }
  173. };
  174.  
  175.  
  176. SearchServer CreateSearchServer() {
  177. SearchServer search_server;
  178. search_server.SetStopWords(ReadLine());
  179.  
  180. const int document_count = ReadLineWithNumber();
  181. for (int document_id = 0; document_id < document_count; ++document_id) {
  182. search_server.AddDocument(document_id, ReadLine());
  183. }
  184.  
  185. return search_server;
  186. }
  187.  
  188.  
  189. int main() {
  190. const SearchServer search_server = CreateSearchServer();
  191.  
  192. const string query = ReadLine();
  193. for (auto [document_id, relevance] : search_server.FindTopDocuments(query)) {
  194. cout << "{ document_id = "s << document_id << ", "
  195. << "relevance = "s << relevance << " }"s
  196. << endl;
  197. }
  198.  
  199. return 0;
  200. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement