Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <fstream>
- #include <iterator>
- #include <vector>
- #include <algorithm>
- #include <unordered_map>
- #include <iostream>
- #include <stdexcept>
- #include <bitset>
- struct Node {
- public:
- Node(char symbol, int frequency, char part_of_code)
- : symbol_(symbol),
- frequency_(frequency),
- code_(part_of_code),
- parent_(NULL),
- left_(NULL),
- right_(NULL) {
- }
- Node(Node *a, Node *b)
- : symbol_('0'),
- frequency_(a->get_frequency() + b->get_frequency()),
- left_(a),
- right_(b),
- code_('0'),
- parent_(NULL) {
- a->set_parent(this);
- b->set_parent(this);
- a->set_code('0');
- b->set_code('1');
- }
- ~Node() {
- delete left_;
- delete right_;
- }
- void set_code(char code) {
- code_ = code;
- }
- void set_parent(Node *parent) {
- parent_ = parent;
- }
- Node *get_parent() {
- return parent_;
- }
- Node *get_left() {
- return left_;
- }
- Node *get_right() {
- return right_;
- }
- char get_symbol() {
- return symbol_;
- }
- int get_frequency() {
- return frequency_;
- }
- char get_code() {
- return code_;
- }
- private:
- char symbol_;
- int frequency_;
- Node *parent_;
- Node *left_;
- Node *right_;
- char code_;
- };
- struct HuffmanTree {
- public:
- HuffmanTree(std::vector <std::pair<char, int> > *pairs) {
- root = NULL;
- leafs = new std::vector <Node*>();
- if (pairs->size() == 0) {return;}
- else if (pairs->size() == 1) {
- Node *node = new Node((*pairs)[0].first,(*pairs)[0].second, '0');
- leafs->push_back(node);
- root = node;
- }
- else {
- int root_frequency = std::numeric_limits<int>::max();
- int pointer = 0;
- while (pointer + 1 < pairs->size()) {
- int first_frequency = (*pairs)[pointer].second;
- int second_frequency = (*pairs)[pointer + 1].second;
- if (second_frequency < root_frequency) {
- connect_two_nodes((*pairs)[pointer], (*pairs)[pointer + 1]);
- pointer += 2;
- } else {
- if (first_frequency < root_frequency) {connect_one_node((*pairs)[pointer], true);}
- else {connect_one_node((*pairs)[pointer], false);}
- pointer++;
- }
- root_frequency = root->get_frequency();
- }
- if (pointer + 1 == pairs->size()) {
- if ((*pairs)[pointer].second < root_frequency) {connect_one_node((*pairs)[pointer], true);}
- else {connect_one_node((*pairs)[pointer], false);}
- }
- }
- }
- ~HuffmanTree() {
- delete root;
- delete leafs;
- }
- std::unordered_map<char, std::string> *get_code_table() {
- std::unordered_map<char, std::string> *code_table = new std::unordered_map<char, std::string>();
- for (size_t i = 0; i != (unsigned int) leafs->size(); ++i) {
- (*code_table)[(*leafs)[i]->get_symbol()] = get_code(*(*leafs)[i]);
- }
- return code_table;
- }
- char get_symbol(char* buffer, size_t &buffer_pointer){
- size_t old_pointer = buffer_pointer;
- Node *tree_pointer = root;
- while(tree_pointer->get_left() != NULL || tree_pointer->get_right() != NULL){
- if(buffer[buffer_pointer] == '0'){
- tree_pointer = tree_pointer->get_left();
- }else if(buffer[buffer_pointer] == '1'){
- tree_pointer = tree_pointer->get_right();
- }
- buffer_pointer++;
- }
- for(int i = old_pointer; i < buffer_pointer; ++i){
- std::cout << buffer[i];
- }
- std::cout << std::endl;
- std::cout << "returning symbol is : " << tree_pointer->get_symbol()<< std::endl;
- return tree_pointer->get_symbol();
- }
- private:
- Node *root;
- std::vector<Node*> *leafs;
- std::string get_code(Node &node) {
- Node *pointer = &node;
- std::string code;
- while (pointer != root) {
- code += pointer->get_code();
- pointer = pointer->get_parent();
- }
- std::reverse(code.rbegin(), code.rend());
- return code;
- }
- void connect_two_nodes(std::pair<char, int> pair1, std::pair<char, int> pair2) {
- Node *left_node = new Node(pair1.first, pair1.second, '1');
- Node *right_node = new Node(pair2.first, pair2.second, '0');
- Node *left_and_right_merged = new Node(left_node, right_node);
- leafs->push_back(left_node);
- leafs->push_back(right_node);
- if (root == NULL) {
- root = left_and_right_merged;
- } else {
- Node *old_root = root;
- old_root->set_code('1');
- left_and_right_merged->set_code('0');
- Node *new_root = new Node(old_root, left_and_right_merged);
- root = new_root;
- }
- }
- void connect_one_node(std::pair<char, int> pair, bool connect_as_right) {
- if (connect_as_right) {
- Node *leaf = new Node(pair.first, pair.second, '0');
- root->set_code('1');
- leafs->push_back(leaf);
- Node *new_root = new Node(leaf, root);
- root = new_root;
- } else {
- Node *leaf = new Node(pair.first, pair.second, '1');
- leafs->push_back(leaf);
- root->set_code('0');
- Node *new_root = new Node(root, leaf);
- root = new_root;
- }
- }
- };
- struct frequency_pair_comparator {
- inline bool operator()(const std::pair<char, int> &a, const std::pair<char, int> &b) {
- return (a.second < b.second);
- }
- };
- std::vector<char> *get_symbols(char* input_name) {
- std::ifstream input (input_name, std::ifstream::binary);
- if(!input){
- return new std::vector<char>();
- }
- input.seekg (0, input.end);
- size_t length = input.tellg();
- input.seekg (0, input.beg);
- std::cout << length << std::endl;
- char *buffer = new char [length];
- input.read(buffer, length);
- std::vector<char> *symbols = new std::vector<char>();
- for(size_t i = 0; i < length; i++){
- symbols->push_back(buffer[i]);
- }
- input.close();
- delete buffer;
- return symbols;
- }
- std::vector<std::pair<char, int> > *get_frequencies(std::vector<char> *symbols) {
- std::unordered_map<char, int> frequencies;
- for (size_t i = 0; i != symbols->size(); ++i) {
- if (frequencies[(*symbols)[i]] == 0) {
- frequencies[(*symbols)[i]] = 1;
- } else {
- frequencies[(*symbols)[i]] += 1;
- }
- }
- std::vector<std::pair<char, int> > *sortable_frequencies = new std::vector<std::pair<char, int> >();
- for(std::unordered_map<char, int>::iterator it = frequencies.begin(); it != frequencies.end(); ++it){
- sortable_frequencies->push_back( std::pair<char, int>(it->first, it->second));
- }
- std::sort(sortable_frequencies->begin(), sortable_frequencies->end(), frequency_pair_comparator());
- return sortable_frequencies;
- }
- void write_coded_data(char* output_name, std::vector<std::pair<char, int> > *frequences, std::unordered_map<char, std::string> *code_table, std::vector<char> *symbols){
- long long tree_size = 0;
- std::ofstream outfile (output_name, std::ofstream::binary);
- outfile << " " <<frequences->size() << std::endl;
- tree_size += sizeof(frequences->size());
- for(std::vector<std::pair<char, int> >::iterator it = frequences->begin(); it != frequences->end(); ++it){
- outfile << it->first << " " << it->second << std::endl;
- tree_size += (sizeof(it->second) + sizeof(it->first) + sizeof(" ") + 1);
- }
- std::cout << tree_size << std::endl;
- const size_t buffer_size = 8;
- char buffer[buffer_size + 1];
- size_t buffer_pointer = 0;
- size_t code_pointer = 0;
- size_t code_size = 0;
- std::string code;
- size_t symbols_pointer = 0;
- long long new_symbols_counter = 0;
- while(symbols_pointer != (unsigned int) symbols->size() || (symbols_pointer == (unsigned int) symbols->size() && code_size - code_pointer >= 8)){
- if(code_size == 0){
- code = (*code_table)[(*symbols)[symbols_pointer]];
- code_size = code.length();
- symbols_pointer++;
- }
- while(buffer_pointer < buffer_size && code_pointer < code_size){
- buffer[buffer_pointer] = code.at(code_pointer);
- ++code_pointer;
- ++buffer_pointer;
- }
- if(buffer_pointer == buffer_size){
- buffer_pointer = 0;
- unsigned long i = 0;
- try {
- std::bitset<buffer_size> bit_buffer ((std::string(buffer)));
- i = bit_buffer.to_ulong();
- }
- catch (const std::invalid_argument& ia) {}
- char char_buffer = static_cast<unsigned char>( i );
- outfile.write(&char_buffer, sizeof(char_buffer));
- new_symbols_counter += sizeof(char);
- }
- if(code_pointer == code_size){
- code_size = 0;
- code_pointer = 0;
- }
- }
- if(buffer_pointer != buffer_size){
- unsigned long i = 0;
- try {
- std::bitset<buffer_size> bit_buffer ((std::string(buffer)));
- i = bit_buffer.to_ulong();
- }
- catch (const std::invalid_argument& ia) {}
- char char_buffer = static_cast<unsigned char>( i );
- outfile.write(&char_buffer, sizeof(char_buffer));
- outfile.seekp(std::ios_base::beg);
- outfile << buffer_pointer << std::endl;
- }else{
- outfile.seekp(std::ios_base::beg);
- outfile << 0 << std::endl;
- }
- std::cout << new_symbols_counter << std::endl;
- outfile.close();
- }
- void compress_file(char* input_name, char* output_name) {
- std::vector<char> *symbols = get_symbols(input_name);
- std::vector<std::pair<char, int> > *frequencies_table = get_frequencies(symbols);
- HuffmanTree tree(frequencies_table);
- std::unordered_map<char, std::string> *code_table = tree.get_code_table();
- write_coded_data(output_name, frequencies_table, code_table, symbols);
- for(std::unordered_map<char, std::string>::iterator it = code_table->begin(); it != code_table->end(); ++it){
- std::cout << it->first << " " << it->second << std::endl;
- }
- delete symbols;
- delete frequencies_table;
- delete code_table;
- }
- std::vector<std::pair<char, int> > *get_frequences_from_file(std::ifstream &input){
- int number_of_symbols = 0;
- input >> number_of_symbols;
- std:: vector<std::pair<char, int> > *frequences = new std::vector<std::pair<char, int> >();
- for(int i = 0; i != number_of_symbols; ++i){
- char symbol = 0;
- input >> symbol;
- int frequency = 0;
- input >> frequency;
- frequences->push_back(std::pair<char, int>(symbol, frequency));
- }
- return frequences;
- }
- std::vector<unsigned char> *get_data(std::ifstream &input){
- std::vector<unsigned char> *symbols = new std::vector<unsigned char>();
- for(std::istreambuf_iterator<char> it(input), e; it != e; ++it){
- symbols->push_back(*it);
- }
- symbols->erase(symbols->begin(), symbols->begin() + 1);
- std::cout << symbols->size() << std::endl;
- return symbols;
- }
- void make_source_file(char* output_name ,std::vector<std::pair<char, int> > *frequences, char* bits_buffer, size_t bits_buffer_size){
- HuffmanTree tree(frequences);
- size_t pointer = 0;
- std::ofstream outfile (output_name, std::ofstream::binary);
- while(pointer < bits_buffer_size){
- char symbol = tree.get_symbol(bits_buffer, pointer);
- outfile.write(&symbol, sizeof(symbol));
- }
- outfile.close();
- }
- void get_source_file(char* input_name, char* output_name){
- std::ifstream input (input_name, std::ifstream::binary);
- if(!input){
- return;
- }
- int last_symbol_significant_bits = 0;
- input >> last_symbol_significant_bits;
- std::vector<std::pair<char, int> > *frequences = get_frequences_from_file(input);
- std::vector<unsigned char> *symbols = get_data(input);
- input.close();
- std::cout << symbols->size() << std::endl;
- const size_t bits_buffer_size = 8 * (symbols->size() - 1) + last_symbol_significant_bits;
- char *bits_buffer = new char[bits_buffer_size];
- size_t buffer_pointer = 0;
- for(size_t i = 0; i < symbols->size() - 1; ++i){
- std::string bit_string = std::bitset<8>((*symbols)[i]).to_string();
- for(size_t k = 0; k < 8; k++){
- bits_buffer[buffer_pointer] = bit_string.at(k);
- buffer_pointer++;
- }
- }
- std::string bit_string = std::bitset<8>((*symbols)[symbols->size() - 1]).to_string();
- for(size_t i = 0; i < last_symbol_significant_bits; ++i){
- bits_buffer[buffer_pointer] = bit_string.at(i);
- buffer_pointer++;
- }
- //PREVIOUS CODE WORKS PERFECT
- /**
- остались ошибки с разбором слова
- нужно фиксить сам алгоритм кодирования и алгоритм разбора
- **/
- make_source_file(output_name, frequences, bits_buffer, bits_buffer_size);
- delete frequences;
- delete symbols;
- }
- void decompress_file(char* input_name, char* output_name) {
- get_source_file(input_name, output_name);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement