Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdexcept>
- #include <iostream>
- #include <vector>
- #include <fstream>
- #include <algorithm>
- #include <set>
- #include <array>
- #include <random>
- #include <time.h>
- size_t MEMORY = 8 * 10000;
- size_t int_size = 4;
- void SortChunks(std::istream& stream, size_t chunk_count, size_t chunk_elem_count,
- size_t all_elem_count, size_t sorting_index, size_t one_element_count) {
- std::vector<std::array<int32_t, 3>> chunk(chunk_elem_count);
- auto current_size = (int)all_elem_count;
- for (size_t i = 0; i < chunk_count; ++i) {
- int64_t min_element = std::min((int)chunk_elem_count, current_size);
- for (size_t j = 0; j < min_element; ++j) {
- for (size_t k = 0; k < one_element_count; ++k) {
- int32_t value;
- stream.read((char*) &value, int_size);
- chunk[j][k] = value;
- }
- }
- current_size -= chunk_elem_count;
- chunk.resize(min_element);
- auto sort_compare = [sorting_index](const std::array<int32_t, 3>& lhs, const std::array<int32_t, 3>& rhs) {
- return lhs[sorting_index] < rhs[sorting_index];
- };
- std::sort(chunk.begin(), chunk.end(), sort_compare);
- std::ofstream out_chunk("chunk" + std::to_string(i), std::ios::binary| std::ios::out);
- out_chunk.write((char*)&min_element, int_size);
- for (size_t j = 0; j < min_element; ++j) {
- for (size_t k = 0; k < one_element_count; ++k) {
- out_chunk.write((char *) &chunk[j][k], int_size);
- }
- }
- }
- }
- void MergeChunks(int64_t all_elem_count, size_t chunk_count,
- const std::string& outputFile, size_t sorting_index,
- size_t one_element_count) {
- std::ofstream out_chunk(outputFile, std::ios::binary| std::ios::out);
- out_chunk.write((char*)&all_elem_count, int_size);
- std::vector<std::ifstream> streams;
- std::vector<size_t> streams_size;
- std::vector<size_t> streams_counter;
- auto comparator = [sorting_index](const std::pair<std::array<int32_t, 3>, size_t>& first,
- const std::pair<std::array<int32_t, 3>, size_t>& second) {
- return first.first[sorting_index] < second.first[sorting_index];
- };
- std::set<std::pair<std::array<int32_t, 3>, size_t>,
- decltype(comparator)> for_merge(comparator);
- for (size_t i = 0; i < chunk_count; ++i) {
- streams.emplace_back("chunk" + std::to_string(i), std::ios::binary);
- int32_t chunk_size;
- streams.back().read((char*) &chunk_size, int_size);
- streams_size.push_back((size_t)chunk_size);
- std::array<int32_t, 3> chunk_values;
- for (size_t j = 0; j < one_element_count; ++j) {
- int32_t value;
- streams.back().read((char*) &value, int_size);
- chunk_values[j] = value;
- }
- for_merge.insert(std::make_pair(chunk_values, i));
- streams_counter.push_back(0);
- }
- for (size_t i = 0; i < all_elem_count; ++i) {
- auto min = for_merge.begin();
- auto current_value = min->first;
- auto chunk_id = min->second;
- for_merge.erase(min);
- ++streams_counter[chunk_id];
- for (size_t j = 0; j < one_element_count; ++j) {
- out_chunk.write((char*)¤t_value[j], int_size);
- }
- if (streams_size[chunk_id] > streams_counter[chunk_id]) {
- std::array<int32_t, 3> chunk_values;
- for (size_t j = 0; j < one_element_count; ++j) {
- int32_t value;
- streams[chunk_id].read((char*) &value, int_size);
- chunk_values[j] = value;
- }
- for_merge.insert(std::make_pair(chunk_values, chunk_id));
- }
- }
- }
- void ExternalSort(const std::string& inputFile, const std::string& outputFile,
- size_t one_element_count, size_t sorting_index) {
- std::ifstream data(inputFile, std::ios::binary);
- int32_t n;
- data.read((char*) &n, int_size);
- int32_t fileSize = n * 2 * int_size;
- size_t chunk_count = fileSize / MEMORY;
- if (fileSize % MEMORY) {
- ++chunk_count;
- }
- size_t chunk_elem_count;
- if (fileSize < MEMORY) {
- chunk_elem_count = n;
- } else {
- chunk_elem_count = MEMORY / (2 * int_size);
- }
- SortChunks(data, chunk_count, chunk_elem_count,
- n, sorting_index, one_element_count);
- MergeChunks(n, chunk_count, outputFile,
- sorting_index, one_element_count);
- }
- void GenFromVector(const std::vector<int32_t>& numbers, const std::string& fileName) {
- std::ofstream outfile(fileName, std::ios::binary| std::ios::out);
- int32_t n = numbers.size() / 2;
- outfile.write((char*)& n, int_size);
- for(size_t i = 0; i < numbers.size(); ++i) {
- outfile.write((char*)&numbers[i], int_size);
- }
- outfile.close();
- }
- void EvaluateRandomMarks(const std::string& input, const std::string& output) {
- ExternalSort(input, "sorted_0_" + input, 2, 0);
- std::ifstream infile("sorted_0_" + input, std::ios::binary);
- std::ofstream outfile(output, std::ios::binary);
- int32_t n;
- infile.read((char*) &n, int_size);
- outfile.write((char *) &n, int_size);
- std::mt19937 rng;
- rng.seed(std::random_device()());
- std::uniform_int_distribution<std::mt19937::result_type> dist(0, 1);
- for (size_t i = 0; i < n; ++i) {
- int32_t value1, value2;
- infile.read((char*) &value1, int_size);
- infile.read((char*) &value2, int_size);
- int32_t random_mark = (int32_t)dist(rng);
- if (i == 0) {
- random_mark = 1;
- }
- outfile.write((char *) &value1, int_size);
- outfile.write((char *) &random_mark, int_size);
- }
- }
- void JoinMarks1(const std::string& input,
- const std::string& marks,
- const std::string& output,
- bool need_3) {
- std::ifstream infile(input, std::ios::binary);
- std::ifstream marks_infile(marks, std::ios::binary);
- std::ofstream outfile(output, std::ios::binary);
- int32_t n;
- infile.read((char*) &n, int_size);
- marks_infile.read((char*) &n, int_size);
- outfile.write((char *) &n, int_size);
- for (size_t i = 0; i < n; ++i) {
- int32_t x1, x2;
- infile.read((char*) &x1, int_size);
- infile.read((char*) &x2, int_size);
- int32_t y1, y2;
- marks_infile.read((char*) &y1, int_size);
- marks_infile.read((char*) &y2, int_size);
- outfile.write((char *) &x1, int_size);
- if (need_3) {
- outfile.write((char *) &x2, int_size);
- }
- outfile.write((char *) &y2, int_size);
- }
- }
- bool JoinMarks2(const std::string& input, const std::string& marks, const std::string& output) {
- std::ifstream infile(input, std::ios::binary);
- std::ifstream marks_infile(marks, std::ios::binary);
- std::ofstream outfile(output, std::ios::binary);
- int32_t n;
- infile.read((char*) &n, int_size);
- marks_infile.read((char*) &n, int_size);
- outfile.write((char *) &n, int_size);
- size_t counter = 0;
- for (size_t i = 0; i < n; ++i) {
- int32_t x1, x2;
- infile.read((char*) &x1, int_size);
- infile.read((char*) &x2, int_size);
- int32_t y1, y2;
- marks_infile.read((char*) &y1, int_size);
- marks_infile.read((char*) &y2, int_size);
- outfile.write((char *) &x1, int_size);
- int32_t mark_for_write;
- if (x2 == 0 && y2 == 1) {
- mark_for_write = 0;
- counter++;
- } else {
- mark_for_write = 1;
- }
- outfile.write((char *) &mark_for_write, int_size);
- }
- return (counter != 0);
- }
- size_t JoinMarks3(const std::string& in_current_marks,
- const std::string& in_next_marks,
- const std::string& output) {
- std::ifstream current_marks(in_current_marks, std::ios::binary);
- std::ifstream next_marks(in_next_marks, std::ios::binary);
- std::ofstream outfile(output, std::ios::binary);
- int32_t n;
- current_marks.read((char*) &n, int_size);
- next_marks.read((char*) &n, int_size);
- outfile.write((char *) &n, int_size);
- size_t size_counter = 0;
- for (size_t i = 0; i < n; ++i) {
- int32_t x1, x2;
- current_marks.read((char*) &x1, int_size);
- current_marks.read((char*) &x2, int_size);
- int32_t y1, y2;
- next_marks.read((char*) &y1, int_size);
- next_marks.read((char*) &y2, int_size);
- if (x2 == 1) {
- ++size_counter;
- }
- int32_t mark_for_write = x2 + 2 * y2;
- outfile.write((char *) &x1, int_size);
- outfile.write((char *) &mark_for_write, int_size);
- }
- return size_counter;
- }
- void JoinMarks4(const std::string& in_next_next,
- const std::string& in_marks,
- const std::string& output,
- int32_t size_new_list) {
- std::ifstream next_next(in_next_next, std::ios::binary);
- std::ifstream marks(in_marks, std::ios::binary);
- std::ofstream outfile(output, std::ios::binary);
- int32_t n;
- next_next.read((char*) &n, int_size);
- marks.read((char*) &n, int_size);
- outfile.write((char *) &size_new_list, int_size);
- for (size_t i = 0; i < n; ++i) {
- int32_t x1, x2, x3;
- next_next.read((char*) &x1, int_size);
- next_next.read((char*) &x2, int_size);
- next_next.read((char*) &x3, int_size);
- int32_t y1, y2;
- marks.read((char*) &y1, int_size);
- marks.read((char*) &y2, int_size);
- if (y2 % 2 == 1) {
- int32_t next_id;
- next_id = (y2 == 1) ? x3 : x2;
- outfile.write((char *) &x1, int_size);
- outfile.write((char *) &next_id, int_size);
- }
- }
- }
- bool FixRandomMarks(const std::string& input, const std::string& random_marks, const std::string& output) {
- ExternalSort(input, "sorted_1_" + input, 2, 1);
- JoinMarks1("sorted_1_" + input, random_marks, "joined_next_" + random_marks, false);
- ExternalSort("joined_next_" + random_marks, "sorted_joined_next_" + random_marks, 2, 0);
- return JoinMarks2(random_marks, "sorted_joined_next_" + random_marks, output);
- }
- std::string EvaluateMarks(const std::string& input, int recursive_depth) {
- std::string random_marks_file_name = "random_marks_" + std::to_string(recursive_depth);
- std::string fixed_marks_file_name = "marks_" + std::to_string(recursive_depth);
- bool is_changed = false;
- while (!is_changed) {
- EvaluateRandomMarks(input, random_marks_file_name);
- is_changed = FixRandomMarks(input, random_marks_file_name, fixed_marks_file_name);
- }
- return fixed_marks_file_name;
- }
- size_t JoinCurrentNextMarks(const std::string& input,
- const std::string& marks_file_name,
- const std::string& output) {
- JoinMarks1("sorted_1_" + input, marks_file_name, "joined_next_" + marks_file_name, false);
- ExternalSort("joined_next_" + marks_file_name, "sorted_joined_next_" + marks_file_name, 2, 0);
- size_t size_new_list = JoinMarks3(marks_file_name, "sorted_joined_next_" + marks_file_name, output);
- return size_new_list;
- }
- void JoinNextNext(const std::string& input, const std::string& output) {
- JoinMarks1("sorted_1_" + input, "sorted_0_" + input, output, true);
- }
- void JoinNewList(const std::string& current_next_marks,
- const std::string& next_next,
- const std::string& output,
- size_t size_new_list) {
- ExternalSort(next_next, "sorted_0_" + next_next, 3, 0);
- JoinMarks4("sorted_0_" + next_next, current_next_marks, output, size_new_list);
- }
- std::string EvaluateNewList(const std::string& input,
- const std::string& marks_file_name,
- int recursive_depth,
- size_t& size_new_list) {
- std::string current_next_marks = "current_next_" + marks_file_name;
- std::string next_next = "next_next_" + std::to_string(recursive_depth);
- std::string new_list = "new_list_" + std::to_string(recursive_depth);
- size_t size_new = JoinCurrentNextMarks(input, marks_file_name, current_next_marks);
- size_new_list = size_new;
- JoinNextNext(input, next_next);
- JoinNewList(current_next_marks, next_next, new_list, size_new_list);
- return new_list;
- }
- void Join5(const std::string& current_weight_file_name,
- const std::string& marks_file_name,
- const std::string& output,
- size_t size_new_list) {
- std::ifstream current_weight("sorted_" + current_weight_file_name, std::ios::binary);
- std::ifstream next_weight("sorted_joined_next_" + current_weight_file_name, std::ios::binary);
- std::ifstream marks(marks_file_name, std::ios::binary);
- std::ofstream outfile(output, std::ios::binary);
- int32_t n;
- current_weight.read((char*) &n, int_size);
- next_weight.read((char*) &n, int_size);
- marks.read((char*) &n, int_size);
- outfile.write((char *) &size_new_list, int_size);
- for (size_t i = 0; i < n; ++i) {
- int32_t x1, x2;
- current_weight.read((char*) &x1, int_size);
- current_weight.read((char*) &x2, int_size);
- int32_t z1, z2;
- next_weight.read((char*) &z1, int_size);
- next_weight.read((char*) &z2, int_size);
- int32_t y1, y2;
- marks.read((char*) &y1, int_size);
- marks.read((char*) &y2, int_size);
- if (y2 % 2 == 1) {
- int32_t new_weight;
- new_weight = (y2 == 1) ? (x2 + z2) : x2;
- outfile.write((char *) &x1, int_size);
- outfile.write((char *) &new_weight, int_size);
- }
- }
- }
- void Join6(const std::string& input,
- const std::string& result_file_name,
- const std::string& marks_file_name,
- const std::string& weight_file_name,
- const std::string& output) {
- std::ifstream sort_input(input, std::ios::binary);
- std::ifstream marks(marks_file_name, std::ios::binary);
- std::ifstream weigth(weight_file_name, std::ios::binary);
- std::ifstream result(result_file_name, std::ios::binary);
- std::ofstream outfile(output, std::ios::binary);
- int32_t n;
- sort_input.read((char*) &n, int_size);
- outfile.write((char *) &n, int_size);
- marks.read((char*) &n, int_size);
- result.read((char*) &n, int_size);
- weigth.read((char*) &n, int_size);
- for (size_t i = 0; i < n; ++i) {
- int32_t x1, x2;
- sort_input.read((char*) &x1, int_size);
- sort_input.read((char*) &x2, int_size);
- int32_t z1, z2;
- marks.read((char*) &z1, int_size);
- marks.read((char*) &z2, int_size);
- int32_t w1, w2;
- weigth.read((char*) &w1, int_size);
- weigth.read((char*) &w2, int_size);
- if (z2 % 2 == 1) {
- int32_t y1, y2;
- result.read((char*) &y1, int_size);
- result.read((char*) &y2, int_size);
- outfile.write((char *) &y1, int_size);
- outfile.write((char *) &y2, int_size);
- if (z2 == 1) {
- int32_t s = y2 + w2;
- outfile.write((char *) &x2, int_size);
- outfile.write((char *) &s, int_size);
- }
- }
- }
- }
- std::string EvaluateWeight(const std::string& input,
- const std::string& marks_file_name,
- int recursive_depth,
- size_t size_new_list) {
- std::string next_weight = "weight_" + std::to_string(recursive_depth + 1);
- std::string current_weight = "weight_" + std::to_string(recursive_depth);
- ExternalSort(current_weight, "sorted_" + current_weight, 2, 0);
- JoinMarks1("sorted_1_" + input, "sorted_" + current_weight, "joined_next_" + current_weight, false);
- ExternalSort("joined_next_" + current_weight, "sorted_joined_next_" + current_weight, 2, 0);
- Join5(current_weight,
- "current_next_" + marks_file_name,
- next_weight,
- size_new_list);
- return next_weight;
- }
- int32_t GetListSize(const std::string& input) {
- std::ifstream list(input, std::ios::binary);
- int32_t n;
- list.read((char*) &n, int_size);
- return n;
- }
- void EvaluateRealRank(const std::string& input, const std::string& marks_file_name, int recursive_depth) {
- ExternalSort("result_" + std::to_string(recursive_depth + 1),
- "sorted_result_" + std::to_string(recursive_depth + 1), 2, 0);
- Join6("sorted_0_" + input,
- "sorted_result_" + std::to_string(recursive_depth + 1),
- "current_next_" + marks_file_name,
- "weight_" + std::to_string(recursive_depth),
- "result_" + std::to_string(recursive_depth));
- }
- void CreateSimpleList(int recursive_depth) {
- std::ifstream input("weight_" + std::to_string(recursive_depth), std::ios::binary);
- std::ofstream outfile("result_" + std::to_string(recursive_depth), std::ios::binary);
- int32_t n;
- input.read((char*) &n, int_size);
- outfile.write((char *) &n, int_size);
- int32_t x1, x2, zero = 0;
- input.read((char*) &x1, int_size);
- input.read((char*) &x2, int_size);
- outfile.write((char *) &x1, int_size);
- outfile.write((char*) &zero, int_size);
- int32_t y1, y2;
- input.read((char*) &y1, int_size);
- input.read((char*) &y2, int_size);
- outfile.write((char *) &y1, int_size);
- outfile.write((char*) &x2, int_size);
- }
- void EvaluateRanks(const std::string& input, int recursive_depth) {
- auto list_size = GetListSize(input);
- if (list_size <= 2) {
- CreateSimpleList(recursive_depth);
- return;
- }
- size_t size_new_list;
- auto marks_file_name = EvaluateMarks(input, recursive_depth);
- auto new_list_file_name = EvaluateNewList(input, marks_file_name, recursive_depth, size_new_list);
- auto weight_file_name = EvaluateWeight(input, marks_file_name, recursive_depth, size_new_list);
- EvaluateRanks(new_list_file_name, recursive_depth + 1);
- EvaluateRealRank(input, marks_file_name, recursive_depth);
- }
- void GenerateStartingWeight(const std::string& input) {
- std::ifstream infile(input, std::ios::binary);
- std::ofstream outfile("weight_0", std::ios::binary);
- int32_t n;
- infile.read((char*) &n, int_size);
- outfile.write((char *) &n, int_size);
- for (size_t i = 0; i < n; ++i) {
- int32_t x1, x2;
- infile.read((char*) &x1, int_size);
- infile.read((char*) &x2, int_size);
- int32_t weight = 1;
- outfile.write((char *) &x1, int_size);
- outfile.write((char *) &weight, int_size);
- }
- }
- void PostProcess(const std::string& input, const std::string& output) {
- ExternalSort(input, "sorted_" + input, 2, 1);
- std::ifstream infile("sorted_" + input, std::ios::binary);
- std::ofstream outfile(output, std::ios::binary);
- int32_t n;
- infile.read((char*) &n, int_size);
- for (size_t i = 0; i < n; ++i) {
- int32_t x1, x2;
- infile.read((char*) &x1, int_size);
- infile.read((char*) &x2, int_size);
- outfile.write((char *) &x1, int_size);
- }
- }
- void ListRanking(const std::string& input, const std::string& output) {
- GenerateStartingWeight(input);
- EvaluateRanks(input, 0);
- PostProcess("result_0", "output.bin");
- }
- std::vector<int32_t> GenVector(int n) {
- std::vector<int32_t> result;
- for (int i = n; i > 0; --i) {
- result.push_back(i);
- result.push_back(i - 1);
- }
- result.push_back(0);
- result.push_back(n);
- return result;
- }
- int main() {
- // std::vector<int32_t> nums = {4, 5, 5, 1, 1, 2, 3, 4, 2, 3, 7, 6, 8, 9, 11, 12,21 ,19, 45, 40};
- // std::vector<int32_t> nums = {4, 5, 5, 1, 1, 2, 3, 4, 2, 3};
- // std::vector<int32_t> nums = {3, 1, 1, 5, 5, 10, 10, 0,0, 3};
- GenFromVector(GenVector(500000),"input.bin");
- clock_t tStart = clock();
- ListRanking("input.bin", "output.bin");
- std::cout << (double)(clock() - tStart) / CLOCKS_PER_SEC;
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement