1. ///
  2. /// @file
  3. /// @author Julius Pettersson
  4. /// @copyright MIT/Expat License.
  5. /// @brief LZW file archiver
  6. /// @version 1
  7. ///
  8. /// This is the C++11 implementation of a Lempel-Ziv-Welch single-file command-line archiver.
  9. /// It uses the simpler fixed-width code compression method.
  10. /// It was written with Doxygen comments.
  11. ///
  12. /// http://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch
  13. /// http://marknelson.us/2011/11/08/lzw-revisited/
  14. /// http://www.cs.duke.edu/csed/curious/compression/lzw.html
  15. /// http://warp.povusers.org/EfficientLZW/index.html
  16. /// http://en.cppreference.com/
  17. /// http://www.doxygen.org/
  18. ///
  19.  
  20. #include <cstdint>
  21. #include <cstdlib>
  22. #include <exception>
  23. #include <fstream>
  24. #include <ios>
  25. #include <iostream>
  26. #include <istream>
  27. #include <limits>
  28. #include <map>
  29. #include <ostream>
  30. #include <stdexcept>
  31. #include <string>
  32. #include <vector>
  33.  
  34. namespace {
  35.  
  36. typedef std::uint16_t CodeType; ///< Type used to store and retrieve codes.
  37.  
  38. namespace globals {
  39.  
  40. /// Dictionary Maximum Size (when reached, the dictionary will be reset)
  41. const CodeType dms = std::numeric_limits<CodeType>::max();
  42.  
  43. } // namespace globals
  44.  
  45. ///
  46. /// @brief Helper operator intended to simplify code.
  47. /// @param vc   original vector
  48. /// @param c    element to be appended
  49. /// @returns vector resulting from appending `c` to `vc`
  50. ///
  51. std::vector<char> operator + (std::vector<char> vc, char c)
  52. {
  53.     vc.push_back(c);
  54.     return vc;
  55. }
  56.  
  57. ///
  58. /// @brief Compresses the contents of `is` and writes the result to `os`.
  59. /// @param [in] is      input stream
  60. /// @param [out] os     output stream
  61. ///
  62. void compress(std::istream &is, std::ostream &os)
  63. {
  64.     std::map<std::vector<char>, CodeType> dictionary;
  65.  
  66.     // "named" lambda function, used to reset the dictionary to its initial contents
  67.     auto reset_dictionary = [&dictionary] {
  68.         dictionary.clear();
  69.  
  70.         char c = std::numeric_limits<char>::min();
  71.  
  72.         do
  73.         {
  74.             dictionary.insert({{c}, dictionary.size()});
  75.         }
  76.         while (c++ != std::numeric_limits<char>::max());
  77.     };
  78.  
  79. std::clog << "This gets output.\n";
  80.  
  81.     reset_dictionary();
  82.  
  83. std::clog << "This doesn't. Program stalls above.\n";
  84.  
  85.     std::vector<char> s; // String
  86.     char c;
  87.  
  88.     while (is.get(c))
  89.     {
  90.         // dictionary's maximum size was reached
  91.         if (dictionary.size() == globals::dms)
  92.             reset_dictionary();
  93.  
  94.         s.push_back(c);
  95.  
  96.         if (dictionary.count(s) == 0)
  97.         {
  98.             dictionary.insert({s, dictionary.size()});
  99.             s.pop_back();
  100.             os.write(reinterpret_cast<const char *> (&dictionary.at(s)), sizeof (CodeType));
  101.             s = {c};
  102.         }
  103.     }
  104.  
  105.     if (!s.empty())
  106.         os.write(reinterpret_cast<const char *> (&dictionary.at(s)), sizeof (CodeType));
  107. }
  108.  
  109. ///
  110. /// @brief Decompresses the contents of `is` and writes the result to `os`.
  111. /// @param [in] is      input stream
  112. /// @param [out] os     output stream
  113. ///
  114. void decompress(std::istream &is, std::ostream &os)
  115. {
  116.     std::vector<std::vector<char>> dictionary;
  117.  
  118.     // "named" lambda function, used to reset the dictionary to its initial contents
  119.     auto reset_dictionary = [&dictionary] {
  120.         dictionary.clear();
  121.         dictionary.reserve(globals::dms);
  122.  
  123.         char c = std::numeric_limits<char>::min();
  124.  
  125.         do
  126.         {
  127.             dictionary.push_back({c});
  128.         }
  129.         while (c++ != std::numeric_limits<char>::max());
  130.     };
  131.  
  132.     reset_dictionary();
  133.  
  134.     std::vector<char> s; // String
  135.     CodeType k; // Key
  136.  
  137.     while (is.read(reinterpret_cast<char *> (&k), sizeof (CodeType)))
  138.     {
  139.         // dictionary's maximum size was reached
  140.         if (dictionary.size() == globals::dms)
  141.             reset_dictionary();
  142.  
  143.         if (k > dictionary.size())
  144.             throw std::runtime_error("invalid compressed code");
  145.  
  146.         if (k == dictionary.size())
  147.             dictionary.push_back(s + s.front());
  148.         else
  149.         if (!s.empty())
  150.             dictionary.push_back(s + dictionary.at(k).front());
  151.  
  152.         os.write(&dictionary.at(k).front(), dictionary.at(k).size());
  153.         s = dictionary.at(k);
  154.     }
  155. }
  156.  
  157. ///
  158. /// @brief Prints usage information and a custom error message.
  159. /// @param s    custom error message to be printed
  160. /// @param su   Show Usage information
  161. ///
  162. void print_usage(const std::string &s = "", bool su = true)
  163. {
  164.     if (!s.empty())
  165.         std::cerr << "\nERROR: " << s << '\n';
  166.  
  167.     if (su)
  168.     {
  169.         std::cerr << "\nUsage:\n";
  170.         std::cerr << "\tprogram -flag input_file output_file\n\n";
  171.         std::cerr << "Where `flag' is either `c' for compressing, or `d' for decompressing, and\n";
  172.         std::cerr << "`input_file' and `output_file' are distinct files.\n\n";
  173.         std::cerr << "Examples:\n";
  174.         std::cerr << "\tlzw.exe -c license.txt license.lzw\n";
  175.         std::cerr << "\tlzw.exe -d license.lzw new_license.txt\n";
  176.     }
  177.  
  178.     std::cerr << std::endl;
  179. }
  180.  
  181. } // namespace
  182.  
  183. ///
  184. /// @brief Actual program entry point.
  185. /// @param argc             number of command line arguments
  186. /// @param [in] argv        array of command line arguments
  187. /// @retval EXIT_FAILURE    for failed operation
  188. /// @retval EXIT_SUCCESS    for successful operation
  189. ///
  190. int main(int argc, char *argv[])
  191. {
  192.     if (argc != 4)
  193.     {
  194.         print_usage("Wrong number of arguments.");
  195.         return EXIT_FAILURE;
  196.     }
  197.  
  198.     enum class Mode {
  199.         Compress,
  200.         Decompress
  201.     };
  202.  
  203.     Mode m;
  204.  
  205.     if (std::string(argv[1]) == "-c")
  206.         m = Mode::Compress;
  207.     else
  208.     if (std::string(argv[1]) == "-d")
  209.         m = Mode::Decompress;
  210.     else
  211.     {
  212.         print_usage(std::string("flag `") + argv[1] + "' is not recognized.");
  213.         return EXIT_FAILURE;
  214.     }
  215.  
  216.     std::ifstream input_file(argv[2], std::ios_base::binary);
  217.  
  218.     if (!input_file.is_open())
  219.     {
  220.         print_usage(std::string("input_file `") + argv[2] + "' could not be opened.");
  221.         return EXIT_FAILURE;
  222.     }
  223.  
  224.     std::ofstream output_file(argv[3], std::ios_base::binary);
  225.  
  226.     if (!output_file.is_open())
  227.     {
  228.         print_usage(std::string("output_file `") + argv[3] + "' could not be opened.");
  229.         return EXIT_FAILURE;
  230.     }
  231.  
  232.     try
  233.     {
  234.         input_file.exceptions(std::ios_base::badbit);
  235.         output_file.exceptions(std::ios_base::badbit | std::ios_base::failbit);
  236.  
  237.         if (m == Mode::Compress)
  238.             compress(input_file, output_file);
  239.         else
  240.         if (m == Mode::Decompress)
  241.             decompress(input_file, output_file);
  242.     }
  243.     catch (const std::ios_base::failure &f)
  244.     {
  245.         print_usage(std::string("File input/output failure: ") + f.what() + '.', false);
  246.         return EXIT_FAILURE;
  247.     }
  248.     catch (const std::exception &e)
  249.     {
  250.         print_usage(std::string("Caught exception: ") + e.what() + '.', false);
  251.         return EXIT_FAILURE;
  252.     }
  253.  
  254.     return EXIT_SUCCESS;
  255. }