Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <algorithm>
- #include <cstddef>
- #include <istream>
- #include <memory>
- #include <ostream>
- #include <string>
- #include <unordered_map>
- #include <vector>
- #include <queue>
- using namespace std;
- enum Error {
- OK = 0,
- WRONG_ARGUMENTS,
- OUTPUT_ERROR
- // You can add here yours errors.
- };
- struct Node {
- unique_ptr<Node> l_son;
- unique_ptr<Node> r_son;
- uint64_t freq;
- char ch;
- Node(char ch, uint64_t freq) : l_son(nullptr), r_son(nullptr), freq(freq), ch(ch) {}
- Node(Node* l, Node* r) : l_son(l), r_son(r), freq(l->freq + r->freq) {}
- bool operator<(const Node& rhs) const {
- return freq < rhs.freq;
- }
- bool is_leaf() const {
- return !l_son && !r_son;
- }
- };
- class CmpNodes {
- public:
- bool operator()(const Node* lhs, const Node* rhs) {
- return lhs->freq > rhs->freq;
- }
- };
- class Bohr {
- public:
- unique_ptr<Node> root;
- Bohr(const unordered_map<char, uint64_t>& freqs) {
- priority_queue<Node*, vector<Node*>, CmpNodes> pq;
- // inserting one-char trees
- for (const auto& [ch, freq] : freqs) {
- pq.push(new Node(ch, freq));
- }
- while (pq.size() > 1) {
- Node* l_node = pq.top();
- pq.pop();
- Node* r_node = pq.top();
- pq.pop();
- pq.push(new Node(l_node, r_node));
- }
- root.reset(pq.top());
- }
- unordered_map<char, string> get_codes() {
- unordered_map<char, string> codes;
- vector<char> cur_code;
- traversal(root.get(), cur_code, &codes);
- return codes;
- }
- void traversal(Node* cur_node, vector<char>& cur_code, unordered_map<char, string>* codes) {
- if (cur_node->is_leaf()) { // set code for char
- (*codes)[cur_node->ch] = string(cur_code.begin(), cur_code.end());
- }
- if (cur_node->l_son) {
- cur_code.push_back('0');
- traversal(cur_node->l_son.get(), cur_code, codes);
- cur_code.pop_back();
- }
- if (cur_node->r_son) {
- cur_code.push_back('1');
- traversal(cur_node->r_son.get(), cur_code, codes);
- cur_code.pop_back();
- }
- }
- };
- class Reader {
- private:
- static const size_t BUF_SIZE = 4096;
- char buf[BUF_SIZE];
- size_t buf_pos = 0;
- size_t chars_in_buf = 0;
- istream* in;
- size_t start_pos;
- void read_block() {
- in->read(buf, BUF_SIZE);
- chars_in_buf = in->gcount();
- buf_pos = 0;
- }
- public:
- Reader(istream* in) : in(in), start_pos(in->tellg()) {}
- // returns true if successfully read
- bool read(char* ch) {
- if (buf_pos == chars_in_buf) {
- read_block();
- }
- if (buf_pos == chars_in_buf) {
- return false;
- }
- *ch = buf[buf_pos++];
- return true;
- }
- void reset() {
- in->clear();
- in->seekg(start_pos);
- }
- };
- class Writer {
- private:
- static const size_t BUF_SIZE = 4096;
- char buf[BUF_SIZE];
- size_t buf_pos = 0;
- ostream* out;
- public:
- Writer(ostream* out) : out(out) {}
- void write_buf() {
- out->write(buf, buf_pos);
- buf_pos = 0;
- }
- void write_char(char ch) {
- buf[buf_pos++] = ch;
- if (buf_pos == BUF_SIZE) {
- write_buf();
- }
- }
- void write_bit(bool bit) {
- static char bits_written = 0;
- buf[buf_pos] <<= 1;
- buf[buf_pos] += bit;
- ++bits_written;
- if (bits_written == 8) {
- ++buf_pos;
- bits_written = 0;
- }
- }
- void flush() {
- if (buf_pos != 0) {
- write_buf();
- }
- }
- };
- // encodes and prints
- class Encoder {
- private:
- Reader* reader;
- Writer writer;
- uint8_t byte_num = 0; // номер текущего байта в выводе (используется только для первых <= 8)
- unordered_map<char, string> codes;
- // Порядок кодирования
- // В первых 8 байтах младший бит совпадает
- // В первом байте записана мощность алфавита
- // а, в остальных 7 закодирована длина последовательности в битах (хватит на ОООчень большой файл)
- // Остальные байты - закодированный файл, последний байт дополнен нулями, если нужно
- public:
- Encoder(Reader* reader, ostream* out, unordered_map<char, string>&& codes) : reader(reader), writer(out), codes(move(codes)) {}
- void encode_table() {
- vector<pair<char, string>> codes_sorted(codes.begin(), codes.end());
- sort(codes_sorted.begin(), codes_sorted.end());
- for (const auto& [ch, code] : codes) {
- for (char bit : code) {
- writer.write_bit(bit);
- }
- for (int _ = 0; _ < codes.size() - code.length(); ++_) {
- writer.write_bit(0);
- }
- }
- }
- void encode_len() {
- unsigned char n = codes.size();
- bool same_bit = n % 2;
- writer.write_char(n);
- // считаем сумму длин кодов в битах
- uint64_t sum = 0;
- char ch;
- while (reader->read(&ch)) {
- sum += codes[ch].length();
- }
- reader->reset();
- vector<char> code;
- for (int _ = 0; _ < 8; ++_) {
- unsigned char byte = sum % (1 << 7);
- byte <<= 1;
- byte += same_bit;
- code.push_back(byte);
- sum >>= 7;
- }
- reverse(code.begin(), code.end());
- for (char byte : code) {
- writer.write_char(byte);
- }
- }
- void encode() {
- encode_len();
- encode_table();
- char ch;
- while (reader->read(&ch)) {
- for (char bit : codes[ch]) {
- writer.write_bit(bit);
- }
- }
- }
- };
- // count frequency of each character
- unordered_map<char, uint64_t> cnt_freqs(Reader* reader) {
- unordered_map<char, uint64_t> freqs;
- char ch;
- while (reader->read(&ch)) {
- ++freqs[ch];
- }
- return freqs;
- }
- int compress(std::istream* in, std::ostream* out) {
- if (in == nullptr || out == nullptr) {
- return Error::WRONG_ARGUMENTS;
- }
- Reader reader(in);
- Bohr bohr(cnt_freqs(&reader));
- reader.reset();
- try {
- Encoder encoder(&reader, out, bohr.get_codes());
- encoder.encode();
- reader.reset();
- }
- catch (...) {
- return Error::OUTPUT_ERROR;
- }
- return Error::OK;
- }
- // decodes
- class Decoder {
- private:
- Reader reader;
- bool same_bit;
- public:
- Decoder(istream* in, ostream* out) : reader(in) {}
- char get_n() {
- char n = 0;
- if (!reader.read(&n)) {
- throw std::invalid_argument("bad file");
- }
- same_bit = n % 2;
- return n >> 1;
- }
- uint64_t get_len() {
- uint64_t len = 0;
- for (int _ = 0; _ < 7; ++_) {
- char byte;
- if (!reader.read(&byte)) {
- throw std::invalid_argument("bad file");
- }
- if ((byte % 2) != same_bit) {
- throw std::invalid_argument("bad file");
- }
- len <<= 7;
- len += (byte >> 1);
- }
- return len;
- }
- };
- int decompress(std::istream* in, std::ostream* out) {
- if (in == nullptr || out == nullptr) {
- return Error::WRONG_ARGUMENTS;
- }
- return Error::OK;
- }
- #include <sstream>
- #include <fstream>
- ofstream out("output.txt");
- int main() {
- stringstream ss;
- ss << "abracadabra";
- stringstream ssout;
- int x = compress(&ss, &out);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement