Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- * solution.cpp
- *
- * Example solution.
- * This is (almost) how blocks are actually compressed in TON.
- * Normally, blocks are stored using vm::std_boc_serialize with mode=31.
- * Compression algorithm takes a block, converts it to mode=2 (which has less extra information) and compresses it using lz4.
- */
- #include <iostream>
- #include "td/utils/lz4.h"
- #include "td/utils/base64.h"
- #include "vm/boc.h"
- #include "td/utils/buffer.h" // td::BufferSlice, td::Slice, td::MutableSlice
- #include "td/utils/misc.h" // td::buffer_to_hex
- #include "td/utils/Slice.h"
- #include "vm/cellslice.h"
- #include "td/utils/Gzip.h"
- #include "td/utils/ByteFlow.h"
- //#include "zlib.h"
- #include <limits>
- using namespace std;
- int counter = 0, counter2 = 0, counter3 = 0, counter4 = 0;
- int coun = 0;
- map<int, td::Ref<vm::Cell>> used_ref;
- map<string, int> num;
- map<string, bool> used_st;
- map<int, bool> used_int;
- map<int, string> datas, refs, cells_info;
- int coun1 = 0, coun2 = 0;
- int num_sp = 0;
- int la = 0, co = 0;
- void dfs_encode(td::Ref<vm::Cell> v) {
- if (used_st[v->get_hash().to_hex()]) {
- return;
- }
- used_st[v->get_hash().to_hex()] = 1;
- bool b = true;
- vm::CellSlice cellslice_v = vm::load_cell_slice_special(v, b);
- vm::CellBuilder cell_build;
- int num_data = cellslice_v.size();
- string s = "";
- for (int i = 0; i < num_data / 8; i++) {
- char ch = cellslice_v.fetch_long(8);
- s += ch;
- }
- if (cellslice_v.size() != 0) {
- int pl = pow(2, cellslice_v.size());
- int val = 0;
- for (int i = 0; i < num_data % 8; i++) {
- int val2 = abs(cellslice_v.fetch_long(1));
- val *= 2;
- val += val2;
- }
- val += pl;
- u_char ch = val;
- s += ch;
- }
- int cur_counter = counter;
- counter++;
- datas[cur_counter] = s;
- int num_cit = cellslice_v.size_refs();
- s = "";
- if (cellslice_v.is_special()) {
- num_sp++;
- }
- u_char ch = num_cit + 8 * cellslice_v.is_special();
- s += ch;
- ch = (num_data / 8) * 2;
- if (num_data % 8 != 0) {
- ch++;
- }
- s += ch;
- cells_info[cur_counter] = s;
- s = "";
- for (int i = 0; i < num_cit; i++) {
- td::Ref<vm::Cell> son = cellslice_v.fetch_ref();
- if (used_st[son->get_hash().to_hex()]) {
- //cout << cur_counter << " " << i << endl;
- int val = num[son->get_hash().to_hex()];
- if (val - la >= 0 && val - la < 16) {
- val = val - la;
- la = num[son->get_hash().to_hex()];
- u_char ch = val + 128;
- s += ch;
- }
- else {
- co++;
- u_char ch = val / 256;
- s += ch;
- ch = val % 256;
- s += ch;
- }
- ch = cells_info[cur_counter][0];
- ch += (16 << i);
- cells_info[cur_counter][0] = ch;
- }
- else {
- dfs_encode(son);
- }
- }
- refs[cur_counter] = s;
- num[v->get_hash().to_hex()] = cur_counter;
- return;
- }
- td::Ref<vm::Cell> dfs_decode(int v) {
- if (used_int[v]) {
- return used_ref[v];
- }
- used_int[v] = 1;
- vm::CellBuilder cell_build;
- string s;
- //cout << v << " " << (int)(u_char)cells_info[v][0] << " " << (int)(u_char)cells_info[v][1] << endl;
- for (int i = 0; i < ((int)(u_char)(cells_info[v][1]) + 1) / 2; i++) {
- if (i == ((int)(u_char)(cells_info[v][1]) + 1) / 2 - 1 && (int)(u_char)cells_info[v][1] % 2 == 1) {
- int t = (int)(u_char)datas[v][i];
- vector<bool> vec;
- while (t > 1) {
- vec.push_back(t % 2);
- t /= 2;
- }
- reverse(vec.begin(), vec.end());
- for (auto j : vec) {
- if (j == 0) {
- cell_build.store_zeroes(1);
- }
- else {
- cell_build.store_ones(1);
- }
- }
- continue;
- }
- s = "";
- s += datas[v][i];
- cell_build.store_bytes(s);
- }
- //cout << v << " " << (int)(u_char)cells_info[v][0] << endl;
- coun++;
- for (int i = 0; i < (int)(u_char)cells_info[v][0] % 8; i++) {
- int son = (int)(u_char)refs[v][i * 2] * 256 + (int)(u_char)refs[v][i * 2 + 1];
- if (son == 255 * 257) {
- son = coun;
- }
- else {
- if (son >= 128 * 256) {
- son = la + son / 256 - 128;
- la = son;
- }
- }
- cell_build.store_ref(dfs_decode(son));
- }
- used_ref[v] = cell_build.finalize(((int)(u_char)cells_info[v][0] % 16) / 8);
- bool b = true;
- return used_ref[v];
- }
- string enc(string s) {
- td::BufferSlice buf(s);
- buf = td::gzencode(buf, 100.0);
- string ans = "";
- for (int i = 0; i < buf.size(); i++) {
- ans += buf[i];
- }
- return ans;
- }
- string dec(string s) {
- td::BufferSlice buf(s);
- buf = td::gzdecode(buf);
- string ans = "";
- for (int i = 0; i < buf.size(); i++) {
- ans += buf[i];
- }
- return ans;
- }
- bool comp(int a, int b) {
- if ((int)(u_char)cells_info[a][1] != (int)(u_char)cells_info[b][1]) {
- return (int)(u_char)cells_info[a][1] > (int)(u_char)cells_info[b][1];
- }
- return a > b;
- }
- string encode(td::Ref<vm::Cell> root) {
- counter = 0;
- dfs_encode(root);
- string code = "";
- vector<bool> is_sp(counter, 0);
- string codes[13];
- u_char ch = counter / 256;
- codes[0] += ch;
- ch = counter % 256;
- codes[0] += ch;
- int coun = 0;
- for (int i = 0; i < counter; i++) {
- if ((int)(u_char)(cells_info[i][0] % 16) / 8 == 1) {
- is_sp[i] = 1;
- }
- codes[0] += cells_info[i];
- //cout << i << " " << (int)(u_char)cells_info[i][0] << " " << (int)(u_char)cells_info[i][1] << " " << datas[i].size() << endl;
- }
- vector<int> vec;
- vec.clear();
- for (int i = 0; i < counter; i++) {
- if (!is_sp[i]) {
- vec.push_back(i);
- }
- }
- int non_sp_pos = 0;
- sort(vec.begin(), vec.end(), comp);
- for (int i = 0; i < counter; i++) {
- int save_i = i;
- if (!is_sp[i]) {
- i = vec[non_sp_pos];
- non_sp_pos++;
- }
- if (datas[i].size() >= 1 && is_sp[i]) {
- codes[1] += datas[i][0];
- }
- else if (datas[i].size() >= 1) {
- codes[6] += datas[i][0];
- }
- if (datas[i].size() >= 2 && is_sp[i]) {
- codes[2] += datas[i][1];
- }
- else if (datas[i].size() >= 2) {
- codes[7] += datas[i][1];
- }
- if (is_sp[i]) {
- for (int j = 2; j < datas[i].size() - 2; j++) {
- codes[3] += datas[i][j];
- }
- }
- else {
- for (int j = 2; j < datas[i].size(); j++) {
- codes[8] += datas[i][j];
- }
- }
- if (is_sp[i]) {
- codes[4] += (int)(u_char)datas[i][datas[i].size() - 2];
- codes[5] += (int)(u_char)datas[i][datas[i].size() - 1];
- }
- i = save_i;
- }
- //cout << (double)(enc(code2).size() + enc(code3).size()) / (double)(enc(code2 + code3).size()) << endl;
- //cout << (double)(enc(code3).size() + enc(code4).size()) / (double)(enc(code3 + code4).size()) << endl;
- for (int i = 0; i < counter; i++) {
- for (int j = 0; j < refs[i].size(); j++) {
- if ((int)(u_char)refs[i][j] >= 128) {
- codes[9] += refs[i][j];
- }
- else {
- codes[9] += refs[i][j];
- codes[10] += refs[i][j + 1];
- j++;
- }
- }
- }
- code = "";
- code += (u_char)11;
- for (int i = 0; i < 11; i++) {
- code += (u_char)(enc(codes[i]).size() / 256 / 256);
- code += (u_char)((enc(codes[i]).size() % (256 * 256)) / 256);
- code += (u_char)(enc(codes[i]).size() % 256);
- }
- for (int i = 0; i < 11; i++) {
- code += enc(codes[i]);
- }
- return code;
- }
- td::Ref<vm::Cell> decode(string code) {
- string code2 = "";
- int num = (u_char)code[0];
- int pos = 1 + num * 3;
- int pos2 = 0;
- for (int i = 0; i < num; i++) {
- string cur_decode = "";
- int cur_len = (u_char)code[i * 3 + 1] * 256 * 256 + (u_char)code[i * 3 + 2] * 256 + (u_char)code[i * 3 + 3];
- for (int j = 0; j < cur_len; j++) {
- cur_decode += code[pos];
- pos++;
- }
- if (i == num - 1) {
- pos2 = code2.size();
- }
- code2 += dec(cur_decode);
- }
- code = code2;
- cells_info.clear();
- datas.clear();
- refs.clear();
- int n = (int)(u_char)code[0] * 256 + (int)(u_char)code[1];
- vector<bool> is_sp(n, 0);
- pos = 2;
- int sp_num = 0;
- int num_sp = 0;
- for (int i = 0; i < n; i++) {
- u_char ch = code[pos];
- if ((ch % 16) / 8 == 1) {
- is_sp[i] = 1;
- num_sp++;
- }
- cells_info[i] += ch;
- ch = code[pos + 1];
- cells_info[i] += ch;
- pos += 2;
- }
- for (int i = 0; i < n; i++) {
- if (!is_sp[i]) {
- continue;
- }
- if (((int)(u_char)(cells_info[i][1]) + 1) / 2 > 0) {
- datas[i] += code[pos];
- pos++;
- }
- }
- for (int i = 0; i < n; i++) {
- if (!is_sp[i]) {
- continue;
- }
- if (((int)(u_char)(cells_info[i][1]) + 1) / 2 > 1) {
- datas[i] += code[pos];
- pos++;
- }
- }
- for (int i = 0; i < n; i++) {
- if (!is_sp[i]) {
- continue;
- }
- for (int j = 2; j < ((int)(u_char)(cells_info[i][1]) + 1) / 2 - 2; j++) {
- datas[i] += code[pos];
- pos++;
- }
- }
- for (int i = 0; i < n; i++) {
- if (!is_sp[i]) {
- continue;
- }
- datas[i] += code[pos];
- pos++;
- }
- for (int i = 0; i < n; i++) {
- if (!is_sp[i]) {
- continue;
- }
- datas[i] += code[pos];
- pos++;
- }
- vector<int> vec;
- vec.clear();
- for (int i = 0; i < n; i++) {
- if (!is_sp[i]) {
- vec.push_back(i);
- }
- }
- int non_sp_pos = 0;
- sort(vec.begin(), vec.end(), comp);
- for (int i = 0; i < n; i++) {
- if (is_sp[i]) {
- continue;
- }
- int save_i = i;
- i = vec[non_sp_pos];
- non_sp_pos++;
- if (((int)(u_char)(cells_info[i][1]) + 1) / 2 > 0) {
- datas[i] += code[pos];
- pos++;
- }
- i = save_i;
- }
- non_sp_pos = 0;
- for (int i = 0; i < n; i++) {
- if (is_sp[i]) {
- continue;
- }
- int save_i = i;
- i = vec[non_sp_pos];
- non_sp_pos++;
- if (((int)(u_char)(cells_info[i][1]) + 1) / 2 > 1) {
- datas[i] += code[pos];
- pos++;
- }
- i = save_i;
- }
- non_sp_pos = 0;
- for (int i = 0; i < n; i++) {
- if (is_sp[i]) {
- continue;
- }
- int save_i = i;
- i = vec[non_sp_pos];
- non_sp_pos++;
- for (int j = 2; j < ((int)(u_char)(cells_info[i][1]) + 1) / 2; j++) {
- datas[i] += code[pos];
- pos++;
- }
- i = save_i;
- }
- //cout << pos << endl;
- int cur_coun = 0;
- for (int i = 0; i < n; i++) {
- for (int j = 0; j < (int)(u_char)(cells_info[i][0]) % 8; j++) {
- if ((int)(u_char)cells_info[i][0] % (16 << (j + 1)) < (16 << j)) {
- u_char ch = 255;
- refs[i] += ch;
- refs[i] += ch;
- }
- else {
- if ((int)(u_char)code[pos] >= 128) {
- refs[i] += code[pos];
- u_char ch = 0;
- refs[i] += ch;
- pos++;
- }
- else {
- refs[i] += code[pos];
- pos++;
- refs[i] += code[pos2];
- pos2++;
- }
- }
- }
- }
- //cout << cur_coun << endl;
- la = 0;
- return dfs_decode(0);
- }
- td::BufferSlice compress(td::Slice data) {
- td::Ref<vm::Cell> root = vm::std_boc_deserialize(data).move_as_ok();
- td::BufferSlice serialized(encode(root));
- return serialized;
- }
- td::BufferSlice decompress(td::Slice data) {
- td::BufferSlice serialized(data);
- string code = "";
- for (int i = 0; i < serialized.size(); i++) {
- code += serialized[i];
- }
- td::Ref<vm::Cell> root = decode(code);
- return vm::std_boc_serialize(root, 31).move_as_ok();
- }
- int main() {
- std::string mode;
- std::cin >> mode;
- //CHECK(mode == "compress" || mode == "decompress");
- std::string base64_data;
- std::cin >> base64_data;
- //CHECK(!base64_data.empty());
- //td::BufferSlice data2(td::base64_decode(base64_data).move_as_ok());
- //std::cout << "Decoded = " << td::buffer_to_hex(data2) << "\n\n";
- td::BufferSlice data(td::base64_decode(base64_data).move_as_ok());
- /*if (decompress(compress(data)) == data) {
- cout << 57 << endl;
- }*/
- if (mode == "compress") {
- data = compress(data);
- }
- else {
- data = decompress(data);
- }
- //cout << data.size() << endl;
- std::cout << td::base64_encode(data) << std::endl;
- }
Add Comment
Please, Sign In to add comment