Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- //search_server.h
- #pragma once
- #include <istream>
- #include <ostream>
- #include <set>
- #include <list>
- #include <vector>
- #include <map>
- #include <string>
- using namespace std;
- class InvertedIndex {
- public:
- void Add(const string& document);
- vector<size_t> Lookup(const string& word) const;
- const string& GetDocument(size_t id) const {
- return docs[id];
- }
- private:
- map<string, vector<size_t>> index;
- vector<string> docs;
- };
- class SearchServer {
- public:
- SearchServer() = default;
- explicit SearchServer(istream& document_input);
- void UpdateDocumentBase(istream& document_input);
- void AddQueriesStream(istream& query_input, ostream& search_results_output);
- private:
- InvertedIndex index;
- };
- ////////////////////////////////////////////////////////////////////////////////////////////////////////////////
- //search_server.cpp
- #include "search_server.h"
- #include "iterator_range.h"
- #include "duration.h"
- #include <algorithm>
- #include <iterator>
- #include <sstream>
- #include <iostream>
- vector<string> SplitIntoWords(const string& line) {
- istringstream words_input(line);
- return {istream_iterator<string>(words_input), istream_iterator<string>()};
- }
- SearchServer::SearchServer(istream& document_input) {
- UpdateDocumentBase(document_input);
- }
- void SearchServer::UpdateDocumentBase(istream& document_input) {
- InvertedIndex new_index;
- for (string current_document; getline(document_input, current_document); ) {
- new_index.Add(move(current_document));
- }
- index = move(new_index);
- }
- auto SplitIntoWordsDura (string& s, TotalDuration& t) {
- ADD_DURATION(t);
- return SplitIntoWords(s);
- }
- void SearchServer::AddQueriesStream(
- istream& query_input, ostream& search_results_output
- ) {
- TotalDuration read("Total read");
- TotalDuration split("Total split");
- TotalDuration lookup("Total lookup");
- TotalDuration speed_sort("Total work sort");
- TotalDuration form_res("Forming result");
- TotalDuration fill_vec_pair("Total fill vect_pair");
- for (string current_query; getline(query_input, current_query); ) {
- const auto words = SplitIntoWordsDura(current_query,split);
- vector<size_t> docid_count(50'000, 0);
- {ADD_DURATION(lookup);
- for (const auto& word : words) {
- for (const size_t docid : index.Lookup(word)) {
- docid_count[docid]++;
- }
- }
- }
- vector<pair<size_t, size_t>> search_results;
- search_results.reserve(55'005);
- {
- ADD_DURATION(fill_vec_pair);
- for (size_t i = 0;i < 50'000; i++) {
- if (docid_count[i] > 0) {
- search_results.push_back({i, docid_count[i]});
- }
- }
- }
- {
- ADD_DURATION(speed_sort);
- sort(
- search_results.begin(),
- // search_results.begin() +5,
- search_results.end(),
- [](const pair<size_t, size_t>& lhs,const pair<size_t, size_t>& rhs) {
- int64_t lhs_docid = lhs.first;
- auto lhs_hit_count = lhs.second;
- int64_t rhs_docid = rhs.first;
- auto rhs_hit_count = rhs.second;
- return make_pair(lhs_hit_count, -lhs_docid) > make_pair(rhs_hit_count, -rhs_docid);
- }
- );
- }
- {
- ADD_DURATION(form_res);
- search_results_output << current_query << ':';
- for (auto [docid, hitcount] : Head(search_results, 5)) {
- search_results_output << " {"
- << "docid: " << docid << ", "
- << "hitcount: " << hitcount << '}';
- }
- search_results_output << '\n';
- }
- }
- }
- void InvertedIndex::Add(const string& document) {
- docs.push_back(document);
- const size_t docid = docs.size() - 1;
- for (const auto& word : SplitIntoWords(document)) {
- index[word].push_back(docid);
- }
- }
- vector<size_t> InvertedIndex::Lookup(const string& word) const {
- if (auto it = index.find(word); it != index.end()) {
- return it->second;
- } else {
- return {};
- }
- }
- //main.cpp
- #include "search_server.h"
- #include "parse.h"
- #include "test_runner.h"
- #include "duration.h"
- #include <algorithm>
- #include <iterator>
- #include <map>
- #include <vector>
- #include <string>
- #include <sstream>
- #include <fstream>
- #include <random>
- #include <thread>
- using namespace std;
- void TestSpeed ()
- {
- ifstream documents("docum_50000_50.txt");
- ifstream queries("queries_50000_10.txt");
- SearchServer srv;
- LOG_DURATION ("add data in srv speed") {
- srv.UpdateDocumentBase(documents);
- }
- ostringstream queries_output;
- LOG_DURATION ("search speed") {
- srv.AddQueriesStream(queries, queries_output);
- }
- }
- void TestFunctionality(
- const vector<string>& docs,
- const vector<string>& queries,
- const vector<string>& expected
- ) {
- istringstream docs_input(Join('\n', docs));
- istringstream queries_input(Join('\n', queries));
- SearchServer srv;
- srv.UpdateDocumentBase(docs_input);
- ostringstream queries_output;
- srv.AddQueriesStream(queries_input, queries_output);
- const string result = queries_output.str();
- const auto lines = SplitBy(Strip(result), '\n');
- ASSERT_EQUAL(lines.size(), expected.size());
- for (size_t i = 0; i < lines.size(); ++i) {
- ASSERT_EQUAL(lines[i], expected[i]);
- }
- }
- void TestSerpFormat() {
- const vector<string> docs = {
- "london is the capital of great britain",
- "i am travelling down the river"
- };
- const vector<string> queries = {"london", "the"};
- const vector<string> expected = {
- "london: {docid: 0, hitcount: 1}",
- Join(' ', vector{
- "the:",
- "{docid: 0, hitcount: 1}",
- "{docid: 1, hitcount: 1}",
- })
- };
- TestFunctionality(docs, queries, expected);
- }
- void TestTop5() {
- const vector<string> docs = {
- "milk a",
- "milk b",
- "milk c",
- "milk d",
- "milk e",
- "milk f",
- "milk g",
- "water a",
- "water b",
- "fire and earth"
- };
- const vector<string> queries = {"milk", "water", "rock"};
- const vector<string> expected = {
- Join(' ', vector{
- "milk:",
- "{docid: 0, hitcount: 1}",
- "{docid: 1, hitcount: 1}",
- "{docid: 2, hitcount: 1}",
- "{docid: 3, hitcount: 1}",
- "{docid: 4, hitcount: 1}"
- }),
- Join(' ', vector{
- "water:",
- "{docid: 7, hitcount: 1}",
- "{docid: 8, hitcount: 1}",
- }),
- "rock:",
- };
- TestFunctionality(docs, queries, expected);
- }
- void TestHitcount() {
- const vector<string> docs = {
- "the river goes through the entire city there is a house near it",
- "the wall",
- "walle",
- "is is is is",
- };
- const vector<string> queries = {"the", "wall", "all", "is", "the is"};
- const vector<string> expected = {
- Join(' ', vector{
- "the:",
- "{docid: 0, hitcount: 2}",
- "{docid: 1, hitcount: 1}",
- }),
- "wall: {docid: 1, hitcount: 1}",
- "all:",
- Join(' ', vector{
- "is:",
- "{docid: 3, hitcount: 4}",
- "{docid: 0, hitcount: 1}",
- }),
- Join(' ', vector{
- "the is:",
- "{docid: 3, hitcount: 4}",
- "{docid: 0, hitcount: 3}",
- "{docid: 1, hitcount: 1}",
- }),
- };
- TestFunctionality(docs, queries, expected);
- }
- void TestRanking() {
- const vector<string> docs = {
- "london is the capital of great britain",
- "paris is the capital of france",
- "berlin is the capital of germany",
- "rome is the capital of italy",
- "madrid is the capital of spain",
- "lisboa is the capital of portugal",
- "bern is the capital of switzerland",
- "moscow is the capital of russia",
- "kiev is the capital of ukraine",
- "minsk is the capital of belarus",
- "astana is the capital of kazakhstan",
- "beijing is the capital of china",
- "tokyo is the capital of japan",
- "bangkok is the capital of thailand",
- "welcome to moscow the capital of russia the third rome",
- "amsterdam is the capital of netherlands",
- "helsinki is the capital of finland",
- "oslo is the capital of norway",
- "stockgolm is the capital of sweden",
- "riga is the capital of latvia",
- "tallin is the capital of estonia",
- "warsaw is the capital of poland",
- };
- const vector<string> queries = {"moscow is the capital of russia"};
- const vector<string> expected = {
- Join(' ', vector{
- "moscow is the capital of russia:",
- "{docid: 7, hitcount: 6}",
- "{docid: 14, hitcount: 6}",
- "{docid: 0, hitcount: 4}",
- "{docid: 1, hitcount: 4}",
- "{docid: 2, hitcount: 4}",
- })
- };
- TestFunctionality(docs, queries, expected);
- }
- void TestBasicSearch() {
- const vector<string> docs = {
- "we are ready to go",
- "come on everybody shake you hands",
- "i love this game",
- "just like exception safety is not about writing try catch everywhere in your code move semantics are not about typing double ampersand everywhere in your code",
- "daddy daddy daddy dad dad dad",
- "tell me the meaning of being lonely",
- "just keep track of it",
- "how hard could it be",
- "it is going to be legen wait for it dary legendary",
- "we dont need no education"
- };
- const vector<string> queries = {
- "we need some help",
- "it",
- "i love this game",
- "tell me why",
- "dislike",
- "about"
- };
- const vector<string> expected = {
- Join(' ', vector{
- "we need some help:",
- "{docid: 9, hitcount: 2}",
- "{docid: 0, hitcount: 1}"
- }),
- Join(' ', vector{
- "it:",
- "{docid: 8, hitcount: 2}",
- "{docid: 6, hitcount: 1}",
- "{docid: 7, hitcount: 1}",
- }),
- "i love this game: {docid: 2, hitcount: 4}",
- "tell me why: {docid: 5, hitcount: 2}",
- "dislike:",
- "about: {docid: 3, hitcount: 2}",
- };
- TestFunctionality(docs, queries, expected);
- }
- int main() {
- TestRunner tr;
- RUN_TEST(tr, TestSerpFormat);
- RUN_TEST(tr, TestTop5);
- RUN_TEST(tr, TestHitcount);
- RUN_TEST(tr, TestRanking);
- RUN_TEST(tr, TestBasicSearch);
- TestSpeed();
- }
- ////////////////////////////////////////////////////////////////////////////////////////////////////
- //diration.h
- #pragma once
- #include "profile.h"
- #include <chrono>
- #include <iostream>
- #include <sstream>
- using namespace std;
- using namespace chrono;
- struct TotalDuration {
- string message;
- steady_clock::duration value;
- explicit TotalDuration(const string& msg = "")
- : message(msg + ": ")
- , value(0)
- { }
- ~TotalDuration() {
- ostringstream os;
- os << message
- << duration_cast<milliseconds>(value).count()
- << " ms" << endl;
- cerr << os.str();
- }
- };
- class AddDuration {
- public:
- explicit AddDuration(steady_clock::duration& dest)
- : add_to(dest)
- , start(steady_clock::now())
- { }
- explicit AddDuration(TotalDuration& dest)
- : AddDuration(dest.value)
- { }
- ~AddDuration() {
- add_to += steady_clock::now() - start;
- }
- private:
- steady_clock::duration& add_to;
- steady_clock::time_point start;
- };
- #define ADD_DURATION(value) \
- AddDuration UNIQ_ID(__LINE__){value};
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement