Advertisement
Guest User

HuffmanZipperByTsion

a guest
Dec 17th, 2017
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 10.11 KB | None | 0 0
  1. #include <assert.h>
  2. #include <functional>
  3. #include <iostream>
  4. #include <map>
  5. #include <queue>
  6. #include <vector>
  7. #include <stack>
  8.  
  9. #include "Huffman.h"
  10.  
  11. using namespace std;
  12. typedef char byte;
  13.  
  14. class HNode
  15. {
  16. private:
  17.   bool leaf;
  18.   byte letter;
  19.   int frequency;
  20.   HNode *left;
  21.   HNode *right;
  22. public:
  23.   HNode(byte letter, int frequency, bool leaf, HNode *left, HNode *right)
  24.   {
  25.     this->letter = letter;
  26.     this->frequency = frequency;
  27.     this->leaf = leaf;
  28.     this->left = left;
  29.     this->right = right;
  30.   }
  31.  
  32.   HNode(const HNode *&hnode)
  33.   {
  34.     this->letter = hnode->letter;
  35.     this->frequency = hnode->frequency;
  36.     this->leaf = hnode->leaf;
  37.     this->left = hnode->left;
  38.     this->right = hnode->right;
  39.   }
  40.  
  41.   HNode &operator=(const HNode *&hnode)
  42.   {
  43.     this->letter = hnode->letter;
  44.     this->frequency = hnode->frequency;
  45.     this->leaf = hnode->leaf;
  46.     this->left = hnode->left;
  47.     this->right = hnode->right;
  48.     return *this;
  49.   }
  50.  
  51.   bool IsLeaf()
  52.   {
  53.     return this->leaf;
  54.   }
  55.  
  56.   byte &GetLetter()
  57.   {
  58.     return this->letter;
  59.   }
  60.  
  61.   int GetFrequency()
  62.   {
  63.     return this->frequency;
  64.   }
  65.  
  66.   HNode *&GetLeft()
  67.   {
  68.     return this->left;
  69.   }
  70.  
  71.   HNode *&GetRight()
  72.   {
  73.     return this->right;
  74.   }
  75. };
  76.  
  77.  
  78. struct HNodeComparator
  79. {
  80.   bool operator()(HNode *arg0, HNode *arg1)
  81.   {
  82.     return arg0->GetFrequency() > arg1->GetFrequency();
  83.   }
  84. };
  85.  
  86.  
  87. class HuffmanTree
  88. {
  89. public:
  90.   vector<byte> dataforzip;
  91.   vector<bool> packedtree;
  92. private:
  93.   HNode *root;
  94.   map<byte, int> freqcounter;
  95.   priority_queue<HNode *, vector<HNode *>, HNodeComparator> heap;
  96. public:
  97.   HuffmanTree(const vector<byte> &inputstreamdata)
  98.   {
  99.     dataforzip = vector<byte>(inputstreamdata.begin(), inputstreamdata.end());
  100.     this->calculateFrequency_();
  101.     this->buildTree_();
  102.     root = heap.top();
  103.   }
  104.  
  105.   ~HuffmanTree()
  106.   {
  107.     if (!dataforzip.empty())
  108.       dataforzip.clear();
  109.     if (!freqcounter.empty())
  110.       freqcounter.clear();
  111.     while (!heap.empty())
  112.       heap.pop();
  113.     clear_(root);
  114.   }
  115.  
  116.   HNode *&GetRoot()
  117.   {
  118.     return this->root;
  119.   }
  120.  
  121.   void CompleteCodeTable(map<byte, vector<bool> > &encodetable, vector<bool> &path)
  122.   {
  123.     completeCodeTable_(encodetable, path, root);
  124.   }
  125.  
  126.   void GetByte(const vector<bool> &code, byte &letter)
  127.   {
  128.     getByte_(code, letter, this->root);
  129.   }
  130.  
  131.   void GetPackedTree(vector<bool> &buffer)
  132.   {
  133.     if (packedtree.size() == 0)
  134.       packTree_(root);
  135.     getPackedTreeSize_(buffer);
  136.     for (int i = 0; i < packedtree.size(); i++)
  137.       buffer.push_back(packedtree[i]);
  138.   }
  139.  
  140. private:
  141.   void calculateFrequency_()
  142.   {
  143.     for (int i = 0; i < dataforzip.size(); i++)
  144.       freqcounter[dataforzip[i]]++;
  145.   }
  146.  
  147.   void buildTree_()
  148.   {
  149.     HNode *prevnode = nullptr;
  150.     for (map<byte, int>::iterator it = freqcounter.begin(); it != freqcounter.end(); it++)
  151.     {
  152.       prevnode = new HNode(it->first, it->second, true, nullptr, nullptr);
  153.       heap.push(prevnode);
  154.     }
  155.     HNode *currnode = nullptr;
  156.     while (heap.size() > 1)
  157.     {
  158.       prevnode = heap.top();
  159.       heap.pop();
  160.       currnode = new HNode('\0', prevnode->GetFrequency() + heap.top()->GetFrequency(), false, prevnode, heap.top());
  161.       heap.pop();
  162.       heap.push(currnode);
  163.     }
  164.   }
  165.  
  166.   void clear_(HNode *&hnode)
  167.   {
  168.     if (hnode == nullptr)
  169.       return;
  170.     clear_(hnode->GetLeft());
  171.     clear_(hnode->GetRight());
  172.     delete hnode;
  173.   }
  174.  
  175.   void completeCodeTable_(map<byte, vector<bool> > &encodetable, vector<bool> &path, HNode *&hnode)
  176.   {
  177.     if (hnode == nullptr)
  178.       path.pop_back();
  179.     else
  180.     {
  181.       if (hnode->IsLeaf())
  182.       {
  183.         encodetable[hnode->GetLetter()] = vector<bool>(path.begin(), path.end());
  184.         path.pop_back();
  185.       }
  186.       else
  187.       {
  188.         path.push_back(false);
  189.         completeCodeTable_(encodetable, path, hnode->GetLeft());
  190.         path.push_back(true);
  191.         completeCodeTable_(encodetable, path, hnode->GetRight());
  192.         if (path.size() == 0)
  193.           return;
  194.         else
  195.           path.pop_back();
  196.       }
  197.     }
  198.   }
  199.  
  200.   void getByte_(const vector<bool> &code, byte &letter, HNode *&hnode, int bit = 0)
  201.   {
  202.     if (bit == code.size())
  203.     {
  204.       assert(hnode->IsLeaf());
  205.       letter = hnode->GetLetter();
  206.     }
  207.     else
  208.     {
  209.       if (!code[bit])
  210.         getByte_(code, letter, hnode->GetLeft(), bit + 1);
  211.       else
  212.         getByte_(code, letter, hnode->GetRight(), bit + 1);
  213.       return;
  214.     }
  215.   }
  216.  
  217.   void packByte_(byte letter)
  218.   {
  219.     for (int i = 7; i >= 0; i--)
  220.       packedtree.push_back((letter >> i) & 1);
  221.   }
  222.  
  223.   void getPackedTreeSize_(vector<bool> &buffer)
  224.   {
  225.     int packedsize = packedtree.size();
  226.     for (int i = 31; i >= 0; i--)
  227.       buffer.push_back((packedsize >> i) & 1);
  228.   }
  229.  
  230.   void packTree_(HNode *&hnode)
  231.   {
  232.     if (!hnode)
  233.       return;
  234.     packTree_(hnode->GetLeft());
  235.     packTree_(hnode->GetRight());
  236.     if (!hnode->IsLeaf())
  237.       packedtree.push_back(true);
  238.     else
  239.       packedtree.push_back(false), packByte_(hnode->GetLetter());
  240.   }
  241. };
  242.  
  243.  
  244. class HuffmanZipper
  245. {
  246. private:
  247.   HuffmanTree *htree;
  248.   map<byte, vector<bool> > encodetable;
  249. public:
  250.   HuffmanZipper(const vector<byte> &inputstreamdata)
  251.   {
  252.     htree = new HuffmanTree(inputstreamdata);
  253.     vector<bool> path;
  254.     htree->CompleteCodeTable(encodetable, path);
  255.     if (!path.empty())
  256.       path.clear();
  257.   }
  258.  
  259.   ~HuffmanZipper()
  260.   {
  261.     delete htree;
  262.     if (!encodetable.empty())
  263.       encodetable.clear();
  264.   }
  265.  
  266.   void GetCode(vector<bool> &buffer, const byte &letter)
  267.   {
  268.     buffer.clear();
  269.     buffer = vector<bool>(encodetable[letter].begin(), encodetable[letter].end());
  270.   }
  271.  
  272.   void GetByte(const vector<bool> &code, byte &letter)
  273.   {
  274.     this->htree->GetByte(code, letter);
  275.   }
  276.  
  277.   void GetPackedTree(vector<bool> &buffer)
  278.   {
  279.     this->htree->GetPackedTree(buffer);
  280.   }
  281.  
  282.   void PutCompressedTree(vector<byte> &compresseddata)
  283.   {
  284.     vector<bool> buffer;
  285.     htree->GetPackedTree(buffer);
  286.     byte outputvalue = 0;
  287.     for (int i = 0; i < buffer.size(); i++)
  288.     {
  289.       outputvalue |= buffer[i] << (7 - (i % 8));
  290.       if (i % 8 == 7)
  291.       {
  292.         compresseddata.push_back(outputvalue);
  293.         outputvalue = 0;
  294.       }
  295.     }
  296.     if (outputvalue != 0)
  297.       compresseddata.push_back(outputvalue);
  298.     buffer.clear();
  299.   }
  300.  
  301.   void PutCompressedText(vector<byte> &compresseddata)
  302.   {
  303.     unsigned char bitindex = 0;
  304.     vector<bool> buffer;
  305.     byte outputvalue = 0;
  306.     int unusedcount = 0;
  307.     for (int i = 0; i < 8; i++)
  308.       if (!(compresseddata[compresseddata.size() - 1] & (1 << i)))
  309.         unusedcount++;
  310.       else
  311.         break;
  312.     int unusedCopy = unusedcount;
  313.     int treeSize = compresseddata.size();
  314.     compresseddata.push_back(0);
  315.     for (int i = 0; i < htree->dataforzip.size(); i++)
  316.     {
  317.       GetCode(buffer, htree->dataforzip[i]);
  318.       for (int j = 0; j < buffer.size(); j++)
  319.       {
  320.         outputvalue = (outputvalue << 1) | buffer[j];
  321.         bitindex++;
  322.         if (unusedcount != 0 && bitindex == unusedcount)
  323.         {
  324.           bitindex = 0;
  325.           for (int z = 0; z < unusedcount; z++)
  326.             compresseddata[compresseddata.size() - 1] |= outputvalue & (1 << z);
  327.           outputvalue = 0;
  328.           unusedcount = 0;
  329.         }
  330.         if (bitindex == 8)
  331.         {
  332.           bitindex = 0;
  333.           compresseddata.push_back(outputvalue);
  334.           outputvalue = 0;
  335.         }
  336.       }
  337.       buffer.clear();
  338.     }
  339.     compresseddata[treeSize - 1] |= (8 - bitindex) >> (8 - unusedCopy);
  340.     compresseddata[treeSize] |= (8 - bitindex) << unusedCopy;
  341.     if (bitindex != 0)
  342.       compresseddata.push_back(outputvalue << (8 - bitindex));
  343.   }
  344. };
  345.  
  346.  
  347. void Encode(IInputStream &original, IOutputStream &compressed)
  348. {
  349.   vector<byte> inputstreamdata;
  350.   byte inputvalue = 0;
  351.   while (original.Read(inputvalue))
  352.     inputstreamdata.push_back(inputvalue);
  353.   HuffmanZipper *hzipper = new HuffmanZipper(inputstreamdata);
  354.   inputstreamdata.clear();
  355.   vector<byte> outputstreamdata;
  356.   hzipper->PutCompressedTree(outputstreamdata);
  357.   hzipper->PutCompressedText(outputstreamdata);
  358.   for (int i = 0; i < outputstreamdata.size(); i++)
  359.     compressed.Write(outputstreamdata[i]);
  360.   delete hzipper;
  361.   outputstreamdata.clear();
  362. }
  363.  
  364.  
  365. void Decode(IInputStream &compressed, IOutputStream &original)
  366. {
  367.   unsigned int treesize = 0;
  368.   byte inputvalue = 0;
  369.   for (int i = 0; i < 4; i++)
  370.   {
  371.     compressed.Read(inputvalue);
  372.     treesize <<= 8;
  373.     treesize |= (unsigned char) inputvalue;
  374.   }
  375.   byte tmp = 0;
  376.   string s;
  377.   stack<HNode *> nodes;
  378.   vector<bool> bynaryString;
  379.   while (compressed.Read(tmp))
  380.     s.push_back(tmp);
  381.   for (int i = 0; i < s.size(); i++)
  382.   {
  383.     for (int j = 7; j >= 0; j--)
  384.     {
  385.       bynaryString.push_back((s[i] >> j) & 1);
  386.     }
  387.   }
  388.   int scannedBits = 0;
  389.   int c = 0;
  390.   for (int i = 0; i < treesize; i++)
  391.   {
  392.     if (scannedBits != 9)
  393.     {
  394.       if (bynaryString[i] && scannedBits == 0)
  395.       {
  396.         HNode *right = nodes.top();
  397.         nodes.pop();
  398.         HNode *left = nodes.top();
  399.         nodes.pop();
  400.         HNode *node = new HNode(0, 0, false, left, right);
  401.         nodes.push(node);
  402.       }
  403.       else
  404.       {
  405.         if (scannedBits != 0)
  406.         {
  407.           int D = (bynaryString[i] ? 1 : 0);
  408.           int U = 8 - scannedBits;
  409.           c |= D << U;
  410.         }
  411.         scannedBits++;
  412.       }
  413.     }
  414.     else
  415.     {
  416.       scannedBits = 0;
  417.       nodes.push(new HNode(c, 0, true, nullptr, nullptr));
  418.       c = 0;
  419.       i--;
  420.     }
  421.   }
  422.   int b = nodes.size();
  423.   HNode *treeRoot = nodes.top();
  424.   byte fakeBits = 0;
  425.   for (int i = treesize; i < treesize + 8; i++)
  426.   {
  427.     fakeBits |= bynaryString[i] << (7 - (i - treesize));
  428.   }
  429.   HNode *candidate = treeRoot;
  430.   for (int i = treesize + 8; i < bynaryString.size() - fakeBits; i++)
  431.   {
  432.     if (!bynaryString[i])
  433.     {
  434.       candidate = candidate->GetLeft();
  435.     }
  436.     else
  437.     {
  438.       candidate = candidate->GetRight();
  439.     }
  440.     if (candidate && candidate->IsLeaf())
  441.     {
  442.       original.Write(candidate->GetLetter());
  443.       candidate = treeRoot;
  444.     }
  445.   }
  446. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement