Advertisement
Guest User

LZW toy archiver

a guest
Feb 7th, 2013
72
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 12.96 KB | None | 0 0
  1. ///
  2. /// @file
  3. /// @author Julius Pettersson
  4. /// @copyright MIT/Expat License.
  5. /// @brief LZW archiver, naive implementation.
  6. ///
  7. /// This is the C++11 implementation of a Lempel-Ziv-Welch single-file command-line archiver.
  8. /// It uses the simpler fixed-width code compression method.
  9. /// It was written with Doxygen comments.
  10. ///
  11. /// http://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch
  12. /// http://en.cppreference.com/
  13. /// http://www.doxygen.org/
  14. ///
  15.  
  16. #include <cstdint>
  17. #include <cstdlib>
  18. #include <fstream>
  19. #include <ios>
  20. #include <iostream>
  21. #include <map>
  22. #include <ostream>
  23. #include <string>
  24. #include <vector>
  25.  
  26. namespace {
  27.  
  28. ///
  29. /// @brief Compresses the contents of `is` and writes the result to `os`.
  30. /// @tparam CodeType    data type used for codes
  31. /// @param [in] is      input stream
  32. /// @param [out] os     output stream
  33. ///
  34. template <typename CodeType = uint32_t>
  35. void compress(std::istream &is, std::ostream &os)
  36. {
  37.     std::map<std::vector<char>, CodeType> code_table {
  38.         {{'\x00'}, 0x00}, {{'\x01'}, 0x01}, {{'\x02'}, 0x02}, {{'\x03'}, 0x03},
  39.         {{'\x04'}, 0x04}, {{'\x05'}, 0x05}, {{'\x06'}, 0x06}, {{'\x07'}, 0x07},
  40.         {{'\x08'}, 0x08}, {{'\x09'}, 0x09}, {{'\x0A'}, 0x0A}, {{'\x0B'}, 0x0B},
  41.         {{'\x0C'}, 0x0C}, {{'\x0D'}, 0x0D}, {{'\x0E'}, 0x0E}, {{'\x0F'}, 0x0F},
  42.         {{'\x10'}, 0x10}, {{'\x11'}, 0x11}, {{'\x12'}, 0x12}, {{'\x13'}, 0x13},
  43.         {{'\x14'}, 0x14}, {{'\x15'}, 0x15}, {{'\x16'}, 0x16}, {{'\x17'}, 0x17},
  44.         {{'\x18'}, 0x18}, {{'\x19'}, 0x19}, {{'\x1A'}, 0x1A}, {{'\x1B'}, 0x1B},
  45.         {{'\x1C'}, 0x1C}, {{'\x1D'}, 0x1D}, {{'\x1E'}, 0x1E}, {{'\x1F'}, 0x1F},
  46.         {{'\x20'}, 0x20}, {{'\x21'}, 0x21}, {{'\x22'}, 0x22}, {{'\x23'}, 0x23},
  47.         {{'\x24'}, 0x24}, {{'\x25'}, 0x25}, {{'\x26'}, 0x26}, {{'\x27'}, 0x27},
  48.         {{'\x28'}, 0x28}, {{'\x29'}, 0x29}, {{'\x2A'}, 0x2A}, {{'\x2B'}, 0x2B},
  49.         {{'\x2C'}, 0x2C}, {{'\x2D'}, 0x2D}, {{'\x2E'}, 0x2E}, {{'\x2F'}, 0x2F},
  50.         {{'\x30'}, 0x30}, {{'\x31'}, 0x31}, {{'\x32'}, 0x32}, {{'\x33'}, 0x33},
  51.         {{'\x34'}, 0x34}, {{'\x35'}, 0x35}, {{'\x36'}, 0x36}, {{'\x37'}, 0x37},
  52.         {{'\x38'}, 0x38}, {{'\x39'}, 0x39}, {{'\x3A'}, 0x3A}, {{'\x3B'}, 0x3B},
  53.         {{'\x3C'}, 0x3C}, {{'\x3D'}, 0x3D}, {{'\x3E'}, 0x3E}, {{'\x3F'}, 0x3F},
  54.         {{'\x40'}, 0x40}, {{'\x41'}, 0x41}, {{'\x42'}, 0x42}, {{'\x43'}, 0x43},
  55.         {{'\x44'}, 0x44}, {{'\x45'}, 0x45}, {{'\x46'}, 0x46}, {{'\x47'}, 0x47},
  56.         {{'\x48'}, 0x48}, {{'\x49'}, 0x49}, {{'\x4A'}, 0x4A}, {{'\x4B'}, 0x4B},
  57.         {{'\x4C'}, 0x4C}, {{'\x4D'}, 0x4D}, {{'\x4E'}, 0x4E}, {{'\x4F'}, 0x4F},
  58.         {{'\x50'}, 0x50}, {{'\x51'}, 0x51}, {{'\x52'}, 0x52}, {{'\x53'}, 0x53},
  59.         {{'\x54'}, 0x54}, {{'\x55'}, 0x55}, {{'\x56'}, 0x56}, {{'\x57'}, 0x57},
  60.         {{'\x58'}, 0x58}, {{'\x59'}, 0x59}, {{'\x5A'}, 0x5A}, {{'\x5B'}, 0x5B},
  61.         {{'\x5C'}, 0x5C}, {{'\x5D'}, 0x5D}, {{'\x5E'}, 0x5E}, {{'\x5F'}, 0x5F},
  62.         {{'\x60'}, 0x60}, {{'\x61'}, 0x61}, {{'\x62'}, 0x62}, {{'\x63'}, 0x63},
  63.         {{'\x64'}, 0x64}, {{'\x65'}, 0x65}, {{'\x66'}, 0x66}, {{'\x67'}, 0x67},
  64.         {{'\x68'}, 0x68}, {{'\x69'}, 0x69}, {{'\x6A'}, 0x6A}, {{'\x6B'}, 0x6B},
  65.         {{'\x6C'}, 0x6C}, {{'\x6D'}, 0x6D}, {{'\x6E'}, 0x6E}, {{'\x6F'}, 0x6F},
  66.         {{'\x70'}, 0x70}, {{'\x71'}, 0x71}, {{'\x72'}, 0x72}, {{'\x73'}, 0x73},
  67.         {{'\x74'}, 0x74}, {{'\x75'}, 0x75}, {{'\x76'}, 0x76}, {{'\x77'}, 0x77},
  68.         {{'\x78'}, 0x78}, {{'\x79'}, 0x79}, {{'\x7A'}, 0x7A}, {{'\x7B'}, 0x7B},
  69.         {{'\x7C'}, 0x7C}, {{'\x7D'}, 0x7D}, {{'\x7E'}, 0x7E}, {{'\x7F'}, 0x7F},
  70.         {{'\x80'}, 0x80}, {{'\x81'}, 0x81}, {{'\x82'}, 0x82}, {{'\x83'}, 0x83},
  71.         {{'\x84'}, 0x84}, {{'\x85'}, 0x85}, {{'\x86'}, 0x86}, {{'\x87'}, 0x87},
  72.         {{'\x88'}, 0x88}, {{'\x89'}, 0x89}, {{'\x8A'}, 0x8A}, {{'\x8B'}, 0x8B},
  73.         {{'\x8C'}, 0x8C}, {{'\x8D'}, 0x8D}, {{'\x8E'}, 0x8E}, {{'\x8F'}, 0x8F},
  74.         {{'\x90'}, 0x90}, {{'\x91'}, 0x91}, {{'\x92'}, 0x92}, {{'\x93'}, 0x93},
  75.         {{'\x94'}, 0x94}, {{'\x95'}, 0x95}, {{'\x96'}, 0x96}, {{'\x97'}, 0x97},
  76.         {{'\x98'}, 0x98}, {{'\x99'}, 0x99}, {{'\x9A'}, 0x9A}, {{'\x9B'}, 0x9B},
  77.         {{'\x9C'}, 0x9C}, {{'\x9D'}, 0x9D}, {{'\x9E'}, 0x9E}, {{'\x9F'}, 0x9F},
  78.         {{'\xA0'}, 0xA0}, {{'\xA1'}, 0xA1}, {{'\xA2'}, 0xA2}, {{'\xA3'}, 0xA3},
  79.         {{'\xA4'}, 0xA4}, {{'\xA5'}, 0xA5}, {{'\xA6'}, 0xA6}, {{'\xA7'}, 0xA7},
  80.         {{'\xA8'}, 0xA8}, {{'\xA9'}, 0xA9}, {{'\xAA'}, 0xAA}, {{'\xAB'}, 0xAB},
  81.         {{'\xAC'}, 0xAC}, {{'\xAD'}, 0xAD}, {{'\xAE'}, 0xAE}, {{'\xAF'}, 0xAF},
  82.         {{'\xB0'}, 0xB0}, {{'\xB1'}, 0xB1}, {{'\xB2'}, 0xB2}, {{'\xB3'}, 0xB3},
  83.         {{'\xB4'}, 0xB4}, {{'\xB5'}, 0xB5}, {{'\xB6'}, 0xB6}, {{'\xB7'}, 0xB7},
  84.         {{'\xB8'}, 0xB8}, {{'\xB9'}, 0xB9}, {{'\xBA'}, 0xBA}, {{'\xBB'}, 0xBB},
  85.         {{'\xBC'}, 0xBC}, {{'\xBD'}, 0xBD}, {{'\xBE'}, 0xBE}, {{'\xBF'}, 0xBF},
  86.         {{'\xC0'}, 0xC0}, {{'\xC1'}, 0xC1}, {{'\xC2'}, 0xC2}, {{'\xC3'}, 0xC3},
  87.         {{'\xC4'}, 0xC4}, {{'\xC5'}, 0xC5}, {{'\xC6'}, 0xC6}, {{'\xC7'}, 0xC7},
  88.         {{'\xC8'}, 0xC8}, {{'\xC9'}, 0xC9}, {{'\xCA'}, 0xCA}, {{'\xCB'}, 0xCB},
  89.         {{'\xCC'}, 0xCC}, {{'\xCD'}, 0xCD}, {{'\xCE'}, 0xCE}, {{'\xCF'}, 0xCF},
  90.         {{'\xD0'}, 0xD0}, {{'\xD1'}, 0xD1}, {{'\xD2'}, 0xD2}, {{'\xD3'}, 0xD3},
  91.         {{'\xD4'}, 0xD4}, {{'\xD5'}, 0xD5}, {{'\xD6'}, 0xD6}, {{'\xD7'}, 0xD7},
  92.         {{'\xD8'}, 0xD8}, {{'\xD9'}, 0xD9}, {{'\xDA'}, 0xDA}, {{'\xDB'}, 0xDB},
  93.         {{'\xDC'}, 0xDC}, {{'\xDD'}, 0xDD}, {{'\xDE'}, 0xDE}, {{'\xDF'}, 0xDF},
  94.         {{'\xE0'}, 0xE0}, {{'\xE1'}, 0xE1}, {{'\xE2'}, 0xE2}, {{'\xE3'}, 0xE3},
  95.         {{'\xE4'}, 0xE4}, {{'\xE5'}, 0xE5}, {{'\xE6'}, 0xE6}, {{'\xE7'}, 0xE7},
  96.         {{'\xE8'}, 0xE8}, {{'\xE9'}, 0xE9}, {{'\xEA'}, 0xEA}, {{'\xEB'}, 0xEB},
  97.         {{'\xEC'}, 0xEC}, {{'\xED'}, 0xED}, {{'\xEE'}, 0xEE}, {{'\xEF'}, 0xEF},
  98.         {{'\xF0'}, 0xF0}, {{'\xF1'}, 0xF1}, {{'\xF2'}, 0xF2}, {{'\xF3'}, 0xF3},
  99.         {{'\xF4'}, 0xF4}, {{'\xF5'}, 0xF5}, {{'\xF6'}, 0xF6}, {{'\xF7'}, 0xF7},
  100.         {{'\xF8'}, 0xF8}, {{'\xF9'}, 0xF9}, {{'\xFA'}, 0xFA}, {{'\xFB'}, 0xFB},
  101.         {{'\xFC'}, 0xFC}, {{'\xFD'}, 0xFD}, {{'\xFE'}, 0xFE}, {{'\xFF'}, 0xFF},
  102.     };
  103.  
  104.     char c; // Char
  105.  
  106.     is.get(c);
  107.  
  108.     std::vector<char> s{c}; // String
  109.  
  110.     while (is.get(c))
  111.     {
  112.         std::vector<char> s_c{s}; // String + Char
  113.  
  114.         s_c.push_back(c);
  115.  
  116.         if (code_table.count(s_c) == 0)
  117.         {
  118.             os.write(reinterpret_cast<const char *> (&code_table[s]), sizeof (CodeType));
  119.             code_table.insert({s_c, code_table.size()});
  120.             s = {c};
  121.         }
  122.         else
  123.             s = s_c;
  124.     }
  125.  
  126.     os.write(reinterpret_cast<const char *> (&code_table[s]), sizeof (CodeType));
  127. }
  128.  
  129. ///
  130. /// @brief Decompresses the contents of `is` and writes the result to `os`.
  131. /// @tparam CodeType    data type used for codes
  132. /// @param [in] is      input stream
  133. /// @param [out] os     output stream
  134. ///
  135. template <typename CodeType = uint32_t>
  136. void decompress(std::istream &is, std::ostream &os)
  137. {
  138.     std::vector<std::vector<char>> code_table {
  139.         {'\x00'}, {'\x01'}, {'\x02'}, {'\x03'}, {'\x04'}, {'\x05'}, {'\x06'}, {'\x07'},
  140.         {'\x08'}, {'\x09'}, {'\x0A'}, {'\x0B'}, {'\x0C'}, {'\x0D'}, {'\x0E'}, {'\x0F'},
  141.         {'\x10'}, {'\x11'}, {'\x12'}, {'\x13'}, {'\x14'}, {'\x15'}, {'\x16'}, {'\x17'},
  142.         {'\x18'}, {'\x19'}, {'\x1A'}, {'\x1B'}, {'\x1C'}, {'\x1D'}, {'\x1E'}, {'\x1F'},
  143.         {'\x20'}, {'\x21'}, {'\x22'}, {'\x23'}, {'\x24'}, {'\x25'}, {'\x26'}, {'\x27'},
  144.         {'\x28'}, {'\x29'}, {'\x2A'}, {'\x2B'}, {'\x2C'}, {'\x2D'}, {'\x2E'}, {'\x2F'},
  145.         {'\x30'}, {'\x31'}, {'\x32'}, {'\x33'}, {'\x34'}, {'\x35'}, {'\x36'}, {'\x37'},
  146.         {'\x38'}, {'\x39'}, {'\x3A'}, {'\x3B'}, {'\x3C'}, {'\x3D'}, {'\x3E'}, {'\x3F'},
  147.         {'\x40'}, {'\x41'}, {'\x42'}, {'\x43'}, {'\x44'}, {'\x45'}, {'\x46'}, {'\x47'},
  148.         {'\x48'}, {'\x49'}, {'\x4A'}, {'\x4B'}, {'\x4C'}, {'\x4D'}, {'\x4E'}, {'\x4F'},
  149.         {'\x50'}, {'\x51'}, {'\x52'}, {'\x53'}, {'\x54'}, {'\x55'}, {'\x56'}, {'\x57'},
  150.         {'\x58'}, {'\x59'}, {'\x5A'}, {'\x5B'}, {'\x5C'}, {'\x5D'}, {'\x5E'}, {'\x5F'},
  151.         {'\x60'}, {'\x61'}, {'\x62'}, {'\x63'}, {'\x64'}, {'\x65'}, {'\x66'}, {'\x67'},
  152.         {'\x68'}, {'\x69'}, {'\x6A'}, {'\x6B'}, {'\x6C'}, {'\x6D'}, {'\x6E'}, {'\x6F'},
  153.         {'\x70'}, {'\x71'}, {'\x72'}, {'\x73'}, {'\x74'}, {'\x75'}, {'\x76'}, {'\x77'},
  154.         {'\x78'}, {'\x79'}, {'\x7A'}, {'\x7B'}, {'\x7C'}, {'\x7D'}, {'\x7E'}, {'\x7F'},
  155.         {'\x80'}, {'\x81'}, {'\x82'}, {'\x83'}, {'\x84'}, {'\x85'}, {'\x86'}, {'\x87'},
  156.         {'\x88'}, {'\x89'}, {'\x8A'}, {'\x8B'}, {'\x8C'}, {'\x8D'}, {'\x8E'}, {'\x8F'},
  157.         {'\x90'}, {'\x91'}, {'\x92'}, {'\x93'}, {'\x94'}, {'\x95'}, {'\x96'}, {'\x97'},
  158.         {'\x98'}, {'\x99'}, {'\x9A'}, {'\x9B'}, {'\x9C'}, {'\x9D'}, {'\x9E'}, {'\x9F'},
  159.         {'\xA0'}, {'\xA1'}, {'\xA2'}, {'\xA3'}, {'\xA4'}, {'\xA5'}, {'\xA6'}, {'\xA7'},
  160.         {'\xA8'}, {'\xA9'}, {'\xAA'}, {'\xAB'}, {'\xAC'}, {'\xAD'}, {'\xAE'}, {'\xAF'},
  161.         {'\xB0'}, {'\xB1'}, {'\xB2'}, {'\xB3'}, {'\xB4'}, {'\xB5'}, {'\xB6'}, {'\xB7'},
  162.         {'\xB8'}, {'\xB9'}, {'\xBA'}, {'\xBB'}, {'\xBC'}, {'\xBD'}, {'\xBE'}, {'\xBF'},
  163.         {'\xC0'}, {'\xC1'}, {'\xC2'}, {'\xC3'}, {'\xC4'}, {'\xC5'}, {'\xC6'}, {'\xC7'},
  164.         {'\xC8'}, {'\xC9'}, {'\xCA'}, {'\xCB'}, {'\xCC'}, {'\xCD'}, {'\xCE'}, {'\xCF'},
  165.         {'\xD0'}, {'\xD1'}, {'\xD2'}, {'\xD3'}, {'\xD4'}, {'\xD5'}, {'\xD6'}, {'\xD7'},
  166.         {'\xD8'}, {'\xD9'}, {'\xDA'}, {'\xDB'}, {'\xDC'}, {'\xDD'}, {'\xDE'}, {'\xDF'},
  167.         {'\xE0'}, {'\xE1'}, {'\xE2'}, {'\xE3'}, {'\xE4'}, {'\xE5'}, {'\xE6'}, {'\xE7'},
  168.         {'\xE8'}, {'\xE9'}, {'\xEA'}, {'\xEB'}, {'\xEC'}, {'\xED'}, {'\xEE'}, {'\xEF'},
  169.         {'\xF0'}, {'\xF1'}, {'\xF2'}, {'\xF3'}, {'\xF4'}, {'\xF5'}, {'\xF6'}, {'\xF7'},
  170.         {'\xF8'}, {'\xF9'}, {'\xFA'}, {'\xFB'}, {'\xFC'}, {'\xFD'}, {'\xFE'}, {'\xFF'}
  171.     };
  172.  
  173.     CodeType    oc; // Old Code
  174.     CodeType    nc; // New Code
  175.     char        c;  // Char
  176.  
  177.     is.read(reinterpret_cast<char *> (&oc), sizeof (CodeType));
  178.     os.write(&code_table[oc].front(), code_table[oc].size());
  179.  
  180.     while (is.read(reinterpret_cast<char *> (&nc), sizeof (CodeType)))
  181.     {
  182.         std::vector<char> s; // String
  183.  
  184.         if (nc >= code_table.size())
  185.         {
  186.             s = code_table[oc];
  187.             s.push_back(c);
  188.         }
  189.         else
  190.             s = code_table[nc];
  191.  
  192.         os.write(&s.front(), s.size());
  193.         c = s[0];
  194.  
  195.         std::vector<char> oc_c{code_table[oc]}; // Old Code + Char
  196.  
  197.         oc_c.push_back(c);
  198.         code_table.push_back(oc_c);
  199.         oc = nc;
  200.     }
  201. }
  202.  
  203. ///
  204. /// @brief Prints usage information and a custom message.
  205. /// @param s    custom message to be printed
  206. ///
  207. void print_usage(const std::string &s = "")
  208. {
  209.     std::cerr << "\nUsage:\n";
  210.     std::cerr << "\tprogram -flag input_file output_file\n\n";
  211.     std::cerr << "Where `flag' is either `c' for compressing, or `d' for decompressing, and\n";
  212.     std::cerr << "`input_file' and `output_file' are distinct files.\n\n";
  213.     std::cerr << "Examples:\n";
  214.     std::cerr << "\tlzw.exe -c license.txt license.lzw\n";
  215.     std::cerr << "\tlzw.exe -d license.lzw new_license.txt\n";
  216.  
  217.     if (!s.empty())
  218.         std::cerr << "\nERROR: " << s << '\n';
  219.  
  220.     std::cerr << std::endl;
  221. }
  222.  
  223. } // namespace
  224.  
  225. ///
  226. /// @brief Actual program entry point.
  227. /// @param argc             number of command line arguments
  228. /// @param [in] argv        array of command line arguments
  229. /// @retval EXIT_FAILURE    for failed operation
  230. /// @retval EXIT_SUCCESS    for successful operation
  231. ///
  232. int main(int argc, char *argv[])
  233. {
  234.     if (argc != 4)
  235.     {
  236.         print_usage("Wrong number of arguments.");
  237.         return EXIT_FAILURE;
  238.     }
  239.  
  240.     enum class Mode {
  241.         Compress,
  242.         Decompress
  243.     };
  244.  
  245.     Mode m;
  246.  
  247.     if (std::string(argv[1]) == "-c")
  248.         m = Mode::Compress;
  249.     else
  250.     if (std::string(argv[1]) == "-d")
  251.         m = Mode::Decompress;
  252.     else
  253.     {
  254.         print_usage(std::string("flag `") + argv[1] + "' is not recognized.");
  255.         return EXIT_FAILURE;
  256.     }
  257.  
  258.     std::ifstream input_file(argv[2], std::ios_base::binary);
  259.  
  260.     if (!input_file.is_open())
  261.     {
  262.         print_usage(std::string("input_file `") + argv[2] + "' could not be opened.");
  263.         return EXIT_FAILURE;
  264.     }
  265.  
  266.     std::ofstream output_file(argv[3], std::ios_base::binary);
  267.  
  268.     if (!output_file.is_open())
  269.     {
  270.         print_usage(std::string("output_file `") + argv[3] + "' could not be opened.");
  271.         return EXIT_FAILURE;
  272.     }
  273.  
  274.     try
  275.     {
  276.         input_file.exceptions(std::ios_base::badbit);
  277.         output_file.exceptions(std::ios_base::badbit | std::ios_base::failbit);
  278.  
  279.         if (m == Mode::Compress)
  280.             compress(input_file, output_file);
  281.         else
  282.         if (m == Mode::Decompress)
  283.             decompress(input_file, output_file);
  284.     }
  285.     catch (const std::ios_base::failure &f)
  286.     {
  287.         print_usage(std::string("File input/output failure: ") + f.what() + '.');
  288.         return EXIT_FAILURE;
  289.     }
  290.  
  291.     return EXIT_SUCCESS;
  292. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement