Advertisement
Guest User

Untitled

a guest
Jan 19th, 2020
176
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 10.80 KB | None | 0 0
  1. //search_server.h
  2. #pragma once
  3.  
  4. #include <istream>
  5. #include <ostream>
  6. #include <set>
  7. #include <list>
  8. #include <vector>
  9. #include <map>
  10. #include <string>
  11. using namespace std;
  12.  
  13. class InvertedIndex {
  14. public:
  15.   void Add(const string& document);
  16.   vector<size_t> Lookup(const string& word) const;
  17.  
  18.   const string& GetDocument(size_t id) const {
  19.     return docs[id];
  20.   }
  21.  
  22. private:
  23.   map<string, vector<size_t>> index;
  24.   vector<string> docs;
  25. };
  26.  
  27. class SearchServer {
  28. public:
  29.   SearchServer() = default;
  30.   explicit SearchServer(istream& document_input);
  31.   void UpdateDocumentBase(istream& document_input);
  32.   void AddQueriesStream(istream& query_input, ostream& search_results_output);
  33.  
  34. private:
  35.   InvertedIndex index;
  36. };
  37.  
  38.  
  39. ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  40. //search_server.cpp
  41.  
  42. #include "search_server.h"
  43. #include "iterator_range.h"
  44. #include "duration.h"
  45.  
  46. #include <algorithm>
  47. #include <iterator>
  48. #include <sstream>
  49. #include <iostream>
  50.  
  51.  
  52. vector<string> SplitIntoWords(const string& line) {
  53.   istringstream words_input(line);
  54.   return {istream_iterator<string>(words_input), istream_iterator<string>()};
  55. }
  56.  
  57. SearchServer::SearchServer(istream& document_input) {
  58.   UpdateDocumentBase(document_input);
  59. }
  60.  
  61. void SearchServer::UpdateDocumentBase(istream& document_input) {
  62.   InvertedIndex new_index;
  63.  
  64.   for (string current_document; getline(document_input, current_document); ) {
  65.     new_index.Add(move(current_document));
  66.   }
  67.  
  68.   index = move(new_index);
  69. }
  70.  
  71. auto SplitIntoWordsDura (string& s, TotalDuration& t) {
  72.     ADD_DURATION(t);
  73.     return SplitIntoWords(s);
  74. }
  75. void SearchServer::AddQueriesStream(
  76.   istream& query_input, ostream& search_results_output
  77. ) {
  78.     TotalDuration read("Total read");
  79.     TotalDuration split("Total split");
  80.     TotalDuration lookup("Total lookup");
  81.     TotalDuration speed_sort("Total work sort");
  82.     TotalDuration form_res("Forming result");
  83.     TotalDuration fill_vec_pair("Total fill vect_pair");
  84.  
  85.   for (string current_query; getline(query_input, current_query); ) {
  86.     const auto words = SplitIntoWordsDura(current_query,split);
  87.  
  88.     vector<size_t> docid_count(50'000, 0);
  89.         {ADD_DURATION(lookup);
  90.    for (const auto& word : words) {
  91.      for (const size_t docid : index.Lookup(word)) {
  92.        docid_count[docid]++;
  93.      }
  94.    }
  95.         }
  96.  
  97.    vector<pair<size_t, size_t>> search_results;
  98.         search_results.reserve(55'005);
  99.         {
  100.         ADD_DURATION(fill_vec_pair);
  101.         for (size_t i = 0;i < 50'000; i++) {
  102.             if (docid_count[i] > 0) {
  103.                 search_results.push_back({i, docid_count[i]});
  104.             }
  105.         }
  106.         }
  107.  
  108.         {
  109.         ADD_DURATION(speed_sort);
  110.                         sort(
  111.                             search_results.begin(),
  112.                         //  search_results.begin() +5,
  113.                             search_results.end(),
  114.                             [](const pair<size_t, size_t>& lhs,const pair<size_t, size_t>& rhs) {
  115.                                 int64_t lhs_docid = lhs.first;
  116.                                 auto lhs_hit_count = lhs.second;
  117.                                 int64_t rhs_docid = rhs.first;
  118.                                 auto rhs_hit_count = rhs.second;
  119.                                 return make_pair(lhs_hit_count, -lhs_docid) > make_pair(rhs_hit_count, -rhs_docid);
  120.                             }
  121.                         );
  122.         }
  123.         {
  124.         ADD_DURATION(form_res);
  125.    search_results_output << current_query << ':';
  126.    for (auto [docid, hitcount] : Head(search_results, 5)) {
  127.      search_results_output << " {"
  128.        << "docid: " << docid << ", "
  129.        << "hitcount: " << hitcount << '}';
  130.    }
  131.    search_results_output << '\n';
  132.         }
  133.  }
  134. }
  135.  
  136. void InvertedIndex::Add(const string& document) {
  137.  docs.push_back(document);
  138.  
  139.  const size_t docid = docs.size() - 1;
  140.  for (const auto& word : SplitIntoWords(document)) {
  141.    index[word].push_back(docid);
  142.  }
  143. }
  144.  
  145. vector<size_t> InvertedIndex::Lookup(const string& word) const {
  146.  if (auto it = index.find(word); it != index.end()) {
  147.    return it->second;
  148.  } else {
  149.    return {};
  150.  }
  151.  
  152. }
  153. //main.cpp
  154. #include "search_server.h"
  155. #include "parse.h"
  156. #include "test_runner.h"
  157. #include "duration.h"
  158.  
  159. #include <algorithm>
  160. #include <iterator>
  161. #include <map>
  162. #include <vector>
  163. #include <string>
  164. #include <sstream>
  165. #include <fstream>
  166. #include <random>
  167. #include <thread>
  168. using namespace std;
  169.  
  170.  
  171. void TestSpeed ()
  172. {
  173.     ifstream documents("docum_50000_50.txt");
  174.     ifstream queries("queries_50000_10.txt");
  175.  
  176.     SearchServer srv;
  177.     LOG_DURATION ("add data in srv speed") {
  178.         srv.UpdateDocumentBase(documents);
  179.     }
  180.     ostringstream queries_output;
  181.     LOG_DURATION ("search speed") {
  182.     srv.AddQueriesStream(queries, queries_output);
  183.     }
  184.    
  185. }
  186.  
  187. void TestFunctionality(
  188.  const vector<string>& docs,
  189.  const vector<string>& queries,
  190.  const vector<string>& expected
  191. ) {
  192.  istringstream docs_input(Join('\n', docs));
  193.  istringstream queries_input(Join('\n', queries));
  194.  
  195.  SearchServer srv;
  196.  srv.UpdateDocumentBase(docs_input);
  197.  ostringstream queries_output;
  198.  srv.AddQueriesStream(queries_input, queries_output);
  199.  
  200.  const string result = queries_output.str();
  201.  const auto lines = SplitBy(Strip(result), '\n');
  202.  ASSERT_EQUAL(lines.size(), expected.size());
  203.  for (size_t i = 0; i < lines.size(); ++i) {
  204.    ASSERT_EQUAL(lines[i], expected[i]);
  205.  }
  206. }
  207.  
  208. void TestSerpFormat() {
  209.  const vector<string> docs = {
  210.    "london is the capital of great britain",
  211.    "i am travelling down the river"
  212.  };
  213.  const vector<string> queries = {"london", "the"};
  214.  const vector<string> expected = {
  215.    "london: {docid: 0, hitcount: 1}",
  216.    Join(' ', vector{
  217.      "the:",
  218.      "{docid: 0, hitcount: 1}",
  219.      "{docid: 1, hitcount: 1}",
  220.    })
  221.  };
  222.  
  223.  TestFunctionality(docs, queries, expected);
  224. }
  225.  
  226. void TestTop5() {
  227.  const vector<string> docs = {
  228.    "milk a",
  229.    "milk b",
  230.    "milk c",
  231.    "milk d",
  232.    "milk e",
  233.    "milk f",
  234.    "milk g",
  235.    "water a",
  236.    "water b",
  237.    "fire and earth"
  238.  };
  239.  
  240.  const vector<string> queries = {"milk", "water", "rock"};
  241.  const vector<string> expected = {
  242.    Join(' ', vector{
  243.      "milk:",
  244.      "{docid: 0, hitcount: 1}",
  245.      "{docid: 1, hitcount: 1}",
  246.      "{docid: 2, hitcount: 1}",
  247.      "{docid: 3, hitcount: 1}",
  248.      "{docid: 4, hitcount: 1}"
  249.    }),
  250.    Join(' ', vector{
  251.      "water:",
  252.      "{docid: 7, hitcount: 1}",
  253.      "{docid: 8, hitcount: 1}",
  254.    }),
  255.    "rock:",
  256.  };
  257.  TestFunctionality(docs, queries, expected);
  258. }
  259.  
  260. void TestHitcount() {
  261.  const vector<string> docs = {
  262.    "the river goes through the entire city there is a house near it",
  263.    "the wall",
  264.    "walle",
  265.    "is is is is",
  266.  };
  267.  const vector<string> queries = {"the", "wall", "all", "is", "the is"};
  268.  const vector<string> expected = {
  269.    Join(' ', vector{
  270.      "the:",
  271.      "{docid: 0, hitcount: 2}",
  272.      "{docid: 1, hitcount: 1}",
  273.    }),
  274.    "wall: {docid: 1, hitcount: 1}",
  275.    "all:",
  276.    Join(' ', vector{
  277.      "is:",
  278.      "{docid: 3, hitcount: 4}",
  279.      "{docid: 0, hitcount: 1}",
  280.    }),
  281.    Join(' ', vector{
  282.      "the is:",
  283.      "{docid: 3, hitcount: 4}",
  284.      "{docid: 0, hitcount: 3}",
  285.      "{docid: 1, hitcount: 1}",
  286.    }),
  287.  };
  288.  TestFunctionality(docs, queries, expected);
  289. }
  290.  
  291. void TestRanking() {
  292.  const vector<string> docs = {
  293.    "london is the capital of great britain",
  294.    "paris is the capital of france",
  295.    "berlin is the capital of germany",
  296.    "rome is the capital of italy",
  297.    "madrid is the capital of spain",
  298.    "lisboa is the capital of portugal",
  299.    "bern is the capital of switzerland",
  300.    "moscow is the capital of russia",
  301.    "kiev is the capital of ukraine",
  302.    "minsk is the capital of belarus",
  303.    "astana is the capital of kazakhstan",
  304.    "beijing is the capital of china",
  305.    "tokyo is the capital of japan",
  306.    "bangkok is the capital of thailand",
  307.    "welcome to moscow the capital of russia the third rome",
  308.    "amsterdam is the capital of netherlands",
  309.    "helsinki is the capital of finland",
  310.    "oslo is the capital of norway",
  311.    "stockgolm is the capital of sweden",
  312.    "riga is the capital of latvia",
  313.    "tallin is the capital of estonia",
  314.    "warsaw is the capital of poland",
  315.  };
  316.  
  317.  const vector<string> queries = {"moscow is the capital of russia"};
  318.  const vector<string> expected = {
  319.    Join(' ', vector{
  320.      "moscow is the capital of russia:",
  321.      "{docid: 7, hitcount: 6}",
  322.      "{docid: 14, hitcount: 6}",
  323.      "{docid: 0, hitcount: 4}",
  324.      "{docid: 1, hitcount: 4}",
  325.      "{docid: 2, hitcount: 4}",
  326.    })
  327.  };
  328.  TestFunctionality(docs, queries, expected);
  329. }
  330.  
  331. void TestBasicSearch() {
  332.  const vector<string> docs = {
  333.    "we are ready to go",
  334.    "come on everybody shake you hands",
  335.    "i love this game",
  336.    "just like exception safety is not about writing try catch everywhere in your code move semantics are not about typing double ampersand everywhere in your code",
  337.    "daddy daddy daddy dad dad dad",
  338.    "tell me the meaning of being lonely",
  339.    "just keep track of it",
  340.    "how hard could it be",
  341.    "it is going to be legen wait for it dary legendary",
  342.    "we dont need no education"
  343.  };
  344.  
  345.  const vector<string> queries = {
  346.    "we need some help",
  347.    "it",
  348.    "i love this game",
  349.    "tell me why",
  350.    "dislike",
  351.    "about"
  352.  };
  353.  
  354.  const vector<string> expected = {
  355.    Join(' ', vector{
  356.      "we need some help:",
  357.      "{docid: 9, hitcount: 2}",
  358.      "{docid: 0, hitcount: 1}"
  359.    }),
  360.    Join(' ', vector{
  361.      "it:",
  362.      "{docid: 8, hitcount: 2}",
  363.      "{docid: 6, hitcount: 1}",
  364.      "{docid: 7, hitcount: 1}",
  365.    }),
  366.    "i love this game: {docid: 2, hitcount: 4}",
  367.    "tell me why: {docid: 5, hitcount: 2}",
  368.    "dislike:",
  369.    "about: {docid: 3, hitcount: 2}",
  370.  };
  371.  TestFunctionality(docs, queries, expected);
  372. }
  373.  
  374. int main() {
  375.  TestRunner tr;
  376.  RUN_TEST(tr, TestSerpFormat);
  377.  RUN_TEST(tr, TestTop5);
  378.  RUN_TEST(tr, TestHitcount);
  379.  RUN_TEST(tr, TestRanking);
  380.  RUN_TEST(tr, TestBasicSearch);
  381.     TestSpeed();
  382. }
  383. ////////////////////////////////////////////////////////////////////////////////////////////////////
  384. //diration.h
  385. #pragma once
  386. #include "profile.h"
  387.  
  388. #include <chrono>
  389. #include <iostream>
  390. #include <sstream>
  391. using namespace std;
  392. using namespace chrono;
  393. struct TotalDuration {
  394.     string message;
  395.     steady_clock::duration value;
  396.  
  397. explicit TotalDuration(const string& msg = "")  
  398.     : message(msg + ": ")  
  399.     , value(0)  
  400.     {  }
  401.  
  402. ~TotalDuration() {
  403.     ostringstream os;
  404.     os << message
  405.         << duration_cast<milliseconds>(value).count()
  406.         << " ms" << endl;
  407.         cerr << os.str();
  408.     }
  409. };
  410.  
  411. class AddDuration {
  412. public:
  413.     explicit AddDuration(steady_clock::duration& dest)
  414.     : add_to(dest)    
  415.     , start(steady_clock::now())  
  416.     {  }
  417.    
  418.     explicit AddDuration(TotalDuration& dest)  
  419.     : AddDuration(dest.value)
  420.     {  }
  421.    
  422.     ~AddDuration() {
  423.         add_to += steady_clock::now() - start;  
  424.     }
  425. private:
  426.     steady_clock::duration& add_to;
  427.     steady_clock::time_point start;
  428.  
  429. };
  430.  
  431. #define ADD_DURATION(value) \
  432.     AddDuration UNIQ_ID(__LINE__){value};
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement