Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <stdlib.h>
- #include <inttypes.h>
- #include <vector>
- class NODE
- {
- private:
- uint16_t weight;
- uint8_t value;
- NODE* left;
- NODE* right;
- NODE* parent;
- public:
- NODE()
- {
- set(NULL, NULL, NULL, 0, 0);
- }
- ~NODE()
- {
- if (left != NULL)
- {
- delete left;
- left = NULL;
- }
- if (right != NULL)
- {
- delete right;
- right = NULL;
- }
- }
- NODE* getRight() { return right; }
- NODE* getLeft() { return left; }
- NODE* getParent() { return parent; }
- uint16_t getWeight() { return weight; }
- uint8_t getValue() { return value; }
- void set(NODE* _parent, NODE* _left, NODE* _right, uint16_t _weight, uint8_t _value)
- {
- parent = _parent;
- left = _left;
- right = _right;
- weight = _weight;
- value = _value;
- }
- void setParent(NODE* a)
- { parent = a; }
- void print()
- {
- printf("%p %p<->%p %hhu [%hu]\n", parent, left, right, value, weight);
- }
- };
- void createHuffmanTable(std::vector< std::vector<bool> >& TABLE, uint16_t* FREQ, std::vector<uint8_t>& FLATTENED_TREE)
- {
- //make the table of nodes
- std::vector<NODE*> all_nodes;
- for (uint16_t i=0; i<256; i++)
- {
- if (FREQ[i] != 0)
- {
- NODE* temp = new NODE;
- temp->set(NULL,NULL,NULL,FREQ[i],i);
- all_nodes.push_back(temp);
- }
- }
- //create the Huffman tree
- if (all_nodes.size() > 1)
- {
- //find the 2 lowest nodes
- NODE* first;
- NODE* second;
- uint8_t first_index;
- uint8_t second_index;
- while (all_nodes.size() > 1)
- {
- first_index = 0;
- second_index = 0;
- //grab the first lowest node
- first = all_nodes[0];
- for (int i=0; i<all_nodes.size(); i++)
- {
- if (all_nodes[i]->getWeight() < first->getWeight())
- {
- first = all_nodes[i];
- first_index = i;
- }
- }
- //remove the first lowest node
- all_nodes[first_index] = all_nodes[all_nodes.size()-1];
- all_nodes.pop_back();
- //grab the second lowest node
- second = all_nodes[0];
- for (int i=0; i<all_nodes.size(); i++)
- {
- if (all_nodes[i]->getWeight() < second->getWeight())
- {
- second = all_nodes[i];
- second_index = i;
- }
- }
- //remove the second lowest node
- all_nodes[second_index] = all_nodes[all_nodes.size()-1];
- all_nodes.pop_back();
- //join the two nodes
- NODE* temp = new NODE;
- temp->set(NULL, first, second, first->getWeight() + second->getWeight(), 0);
- first->setParent(temp);
- second->setParent(temp);
- //push the new node into the vector
- all_nodes.push_back(temp);
- }
- }
- //all_nodes[0] is the head of the tree
- //extract the leaves and create a flattened tree
- std::vector<NODE*> nodes;
- nodes.push_back(all_nodes[0]);
- all_nodes.clear();
- for (uint16_t i=0; i<nodes.size(); i++)
- {
- if (nodes[i]->getLeft() == NULL && nodes[i]->getRight() == NULL)
- {
- all_nodes.push_back(nodes[i]);
- }
- if (nodes[i]->getLeft())
- {
- nodes.push_back(nodes[i]->getLeft());
- //if leaf
- if (nodes[i]->getLeft()->getLeft() == NULL && nodes[i]->getLeft()->getRight() == NULL)
- {
- FLATTENED_TREE.push_back('1');
- FLATTENED_TREE.push_back(nodes[i]->getLeft()->getValue());
- }
- else
- FLATTENED_TREE.push_back('0');
- }
- if (nodes[i]->getRight())
- {
- nodes.push_back(nodes[i]->getRight());
- //if leaf
- if (nodes[i]->getRight()->getLeft() == NULL && nodes[i]->getRight()->getRight() == NULL)
- {
- FLATTENED_TREE.push_back('1');
- FLATTENED_TREE.push_back(nodes[i]->getRight()->getValue());
- }
- else
- FLATTENED_TREE.push_back('0');
- }
- }
- //create the Huffman Table of all characters
- if (all_nodes.size() == 1)
- {
- TABLE[ all_nodes[0]->getValue() ].push_back(1);
- //printf(" - %c[%d]\n", all_nodes[0]->getValue(),all_nodes[0]->getValue());
- }
- else
- {
- //get the bit representation of each character
- for (int i=0; i<all_nodes.size(); i++)
- {
- NODE* temp = all_nodes[i];
- while (temp->getParent() != NULL)
- {
- if (temp->getParent()->getLeft() == temp)
- {
- TABLE[ all_nodes[i]->getValue() ].push_back(0);
- }
- else if (temp->getParent()->getRight() == temp)
- {
- TABLE[ all_nodes[i]->getValue() ].push_back(1);
- }
- temp = temp->getParent();
- }
- }
- }
- // TABLE contains the huffman table with each vector representing the bit sequence
- }
- int main()
- {
- system("clear");
- // STAGE 1: read input and create a frequency table
- //open the input file for reading
- FILE* fInput = fopen("input.txt", "r");
- if (!fInput)
- {
- printf("Input file could not be read!\n");
- fclose(fInput);
- return 0;
- }
- //create a frequency table
- uint16_t frequency_table[256] = {0};
- //read the input source
- char line[1024];
- uint64_t original_byte_count = 0;
- while (fgets(line, 1024, fInput) != NULL)
- {
- for (uint16_t i=0; i<strlen(line); i++)
- {
- //LOSSLESS
- //count the frequency of each character
- frequency_table[ line[i] ]++;
- //LOSSY
- /*
- if (line[i] >= 97 && line[i] <= 122) //lower case letters
- {
- frequency_table[ line[i]-32 ]++;
- }
- else
- {
- frequency_table[ line[i] ]++;
- }
- */
- original_byte_count++;
- }
- }
- if (original_byte_count == 0)
- {
- printf("Nothing to encode\n");
- return 0;
- }
- // STAGE 2: create the Huffman Table of all characters
- std::vector< std::vector<bool> > bitRep(256);
- std::vector<uint8_t> flattened_tree;
- createHuffmanTable(bitRep, frequency_table, flattened_tree);
- // STAGE 3: print out the huffman table of the used characters
- for (int i=0; i<bitRep.size(); i++)
- {
- if (bitRep[i].size() > 0)
- {
- for (int j=bitRep[i].size()-1; j>=0; j=j-1)
- {
- if (bitRep[i][j] == true)
- printf("1");
- else if (bitRep[i][j] == false)
- printf("0");
- }
- printf(" — [%d] %c\n", i, i);
- }
- }
- uint64_t bit_sum = 0;
- for (int i=0; i<256; i++)
- {
- if (frequency_table[i] > 0)
- {
- bit_sum = bit_sum + (bitRep[i].size() * frequency_table[i]);
- }
- }
- printf("\n\n");
- printf("Starting number of bits %llu\n", original_byte_count*8);
- printf("number of bits %llu\n", bit_sum);
- printf("number of bytes %f\n", float(bit_sum)/8.0);
- printf("compression ratio %f\n", float(bit_sum)/float(original_byte_count*8));
- // STAGE 3: store Huffman Table in an array
- uint16_t number_of_active_bytes = 0;
- uint8_t active_byte[32] = {0};
- uint32_t number_of_stored_table_bits = 0;
- for (int i=0; i<256; i++)
- {
- //set the active bytes
- if (!bitRep[i].empty())
- {
- number_of_active_bytes++;
- active_byte[i/8] = active_byte[i/8] | (0x1 << i%8);
- number_of_stored_table_bits = number_of_stored_table_bits + bitRep[i].size();
- }
- }
- printf("\n\n");
- printf("Number of active bytes %hu\n", number_of_active_bytes);
- printf("Number of huffman table bits %u\n", number_of_active_bytes*8 + number_of_stored_table_bits);
- printf("Number of total bits %llu\n", 37*8 + number_of_active_bytes*8 + number_of_stored_table_bits + bit_sum);
- for (int i=0; i<flattened_tree.size(); i++)
- printf("%c", flattened_tree[i]);
- printf("\n");
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement