Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ///
- /// @file
- /// @author Julius Pettersson
- /// @copyright MIT/Expat License.
- /// @brief LZW archiver, naive implementation.
- ///
- /// This is the C++11 implementation of a Lempel-Ziv-Welch single-file command-line archiver.
- /// It uses the simpler fixed-width code compression method.
- /// It was written with Doxygen comments.
- ///
- /// http://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch
- /// http://en.cppreference.com/
- /// http://www.doxygen.org/
- ///
- #include <cstdint>
- #include <cstdlib>
- #include <fstream>
- #include <ios>
- #include <iostream>
- #include <map>
- #include <ostream>
- #include <string>
- #include <vector>
- namespace {
- ///
- /// @brief Compresses the contents of `is` and writes the result to `os`.
- /// @tparam CodeType data type used for codes
- /// @param [in] is input stream
- /// @param [out] os output stream
- ///
- template <typename CodeType = uint32_t>
- void compress(std::istream &is, std::ostream &os)
- {
- std::map<std::vector<char>, CodeType> code_table {
- {{'\x00'}, 0x00}, {{'\x01'}, 0x01}, {{'\x02'}, 0x02}, {{'\x03'}, 0x03},
- {{'\x04'}, 0x04}, {{'\x05'}, 0x05}, {{'\x06'}, 0x06}, {{'\x07'}, 0x07},
- {{'\x08'}, 0x08}, {{'\x09'}, 0x09}, {{'\x0A'}, 0x0A}, {{'\x0B'}, 0x0B},
- {{'\x0C'}, 0x0C}, {{'\x0D'}, 0x0D}, {{'\x0E'}, 0x0E}, {{'\x0F'}, 0x0F},
- {{'\x10'}, 0x10}, {{'\x11'}, 0x11}, {{'\x12'}, 0x12}, {{'\x13'}, 0x13},
- {{'\x14'}, 0x14}, {{'\x15'}, 0x15}, {{'\x16'}, 0x16}, {{'\x17'}, 0x17},
- {{'\x18'}, 0x18}, {{'\x19'}, 0x19}, {{'\x1A'}, 0x1A}, {{'\x1B'}, 0x1B},
- {{'\x1C'}, 0x1C}, {{'\x1D'}, 0x1D}, {{'\x1E'}, 0x1E}, {{'\x1F'}, 0x1F},
- {{'\x20'}, 0x20}, {{'\x21'}, 0x21}, {{'\x22'}, 0x22}, {{'\x23'}, 0x23},
- {{'\x24'}, 0x24}, {{'\x25'}, 0x25}, {{'\x26'}, 0x26}, {{'\x27'}, 0x27},
- {{'\x28'}, 0x28}, {{'\x29'}, 0x29}, {{'\x2A'}, 0x2A}, {{'\x2B'}, 0x2B},
- {{'\x2C'}, 0x2C}, {{'\x2D'}, 0x2D}, {{'\x2E'}, 0x2E}, {{'\x2F'}, 0x2F},
- {{'\x30'}, 0x30}, {{'\x31'}, 0x31}, {{'\x32'}, 0x32}, {{'\x33'}, 0x33},
- {{'\x34'}, 0x34}, {{'\x35'}, 0x35}, {{'\x36'}, 0x36}, {{'\x37'}, 0x37},
- {{'\x38'}, 0x38}, {{'\x39'}, 0x39}, {{'\x3A'}, 0x3A}, {{'\x3B'}, 0x3B},
- {{'\x3C'}, 0x3C}, {{'\x3D'}, 0x3D}, {{'\x3E'}, 0x3E}, {{'\x3F'}, 0x3F},
- {{'\x40'}, 0x40}, {{'\x41'}, 0x41}, {{'\x42'}, 0x42}, {{'\x43'}, 0x43},
- {{'\x44'}, 0x44}, {{'\x45'}, 0x45}, {{'\x46'}, 0x46}, {{'\x47'}, 0x47},
- {{'\x48'}, 0x48}, {{'\x49'}, 0x49}, {{'\x4A'}, 0x4A}, {{'\x4B'}, 0x4B},
- {{'\x4C'}, 0x4C}, {{'\x4D'}, 0x4D}, {{'\x4E'}, 0x4E}, {{'\x4F'}, 0x4F},
- {{'\x50'}, 0x50}, {{'\x51'}, 0x51}, {{'\x52'}, 0x52}, {{'\x53'}, 0x53},
- {{'\x54'}, 0x54}, {{'\x55'}, 0x55}, {{'\x56'}, 0x56}, {{'\x57'}, 0x57},
- {{'\x58'}, 0x58}, {{'\x59'}, 0x59}, {{'\x5A'}, 0x5A}, {{'\x5B'}, 0x5B},
- {{'\x5C'}, 0x5C}, {{'\x5D'}, 0x5D}, {{'\x5E'}, 0x5E}, {{'\x5F'}, 0x5F},
- {{'\x60'}, 0x60}, {{'\x61'}, 0x61}, {{'\x62'}, 0x62}, {{'\x63'}, 0x63},
- {{'\x64'}, 0x64}, {{'\x65'}, 0x65}, {{'\x66'}, 0x66}, {{'\x67'}, 0x67},
- {{'\x68'}, 0x68}, {{'\x69'}, 0x69}, {{'\x6A'}, 0x6A}, {{'\x6B'}, 0x6B},
- {{'\x6C'}, 0x6C}, {{'\x6D'}, 0x6D}, {{'\x6E'}, 0x6E}, {{'\x6F'}, 0x6F},
- {{'\x70'}, 0x70}, {{'\x71'}, 0x71}, {{'\x72'}, 0x72}, {{'\x73'}, 0x73},
- {{'\x74'}, 0x74}, {{'\x75'}, 0x75}, {{'\x76'}, 0x76}, {{'\x77'}, 0x77},
- {{'\x78'}, 0x78}, {{'\x79'}, 0x79}, {{'\x7A'}, 0x7A}, {{'\x7B'}, 0x7B},
- {{'\x7C'}, 0x7C}, {{'\x7D'}, 0x7D}, {{'\x7E'}, 0x7E}, {{'\x7F'}, 0x7F},
- {{'\x80'}, 0x80}, {{'\x81'}, 0x81}, {{'\x82'}, 0x82}, {{'\x83'}, 0x83},
- {{'\x84'}, 0x84}, {{'\x85'}, 0x85}, {{'\x86'}, 0x86}, {{'\x87'}, 0x87},
- {{'\x88'}, 0x88}, {{'\x89'}, 0x89}, {{'\x8A'}, 0x8A}, {{'\x8B'}, 0x8B},
- {{'\x8C'}, 0x8C}, {{'\x8D'}, 0x8D}, {{'\x8E'}, 0x8E}, {{'\x8F'}, 0x8F},
- {{'\x90'}, 0x90}, {{'\x91'}, 0x91}, {{'\x92'}, 0x92}, {{'\x93'}, 0x93},
- {{'\x94'}, 0x94}, {{'\x95'}, 0x95}, {{'\x96'}, 0x96}, {{'\x97'}, 0x97},
- {{'\x98'}, 0x98}, {{'\x99'}, 0x99}, {{'\x9A'}, 0x9A}, {{'\x9B'}, 0x9B},
- {{'\x9C'}, 0x9C}, {{'\x9D'}, 0x9D}, {{'\x9E'}, 0x9E}, {{'\x9F'}, 0x9F},
- {{'\xA0'}, 0xA0}, {{'\xA1'}, 0xA1}, {{'\xA2'}, 0xA2}, {{'\xA3'}, 0xA3},
- {{'\xA4'}, 0xA4}, {{'\xA5'}, 0xA5}, {{'\xA6'}, 0xA6}, {{'\xA7'}, 0xA7},
- {{'\xA8'}, 0xA8}, {{'\xA9'}, 0xA9}, {{'\xAA'}, 0xAA}, {{'\xAB'}, 0xAB},
- {{'\xAC'}, 0xAC}, {{'\xAD'}, 0xAD}, {{'\xAE'}, 0xAE}, {{'\xAF'}, 0xAF},
- {{'\xB0'}, 0xB0}, {{'\xB1'}, 0xB1}, {{'\xB2'}, 0xB2}, {{'\xB3'}, 0xB3},
- {{'\xB4'}, 0xB4}, {{'\xB5'}, 0xB5}, {{'\xB6'}, 0xB6}, {{'\xB7'}, 0xB7},
- {{'\xB8'}, 0xB8}, {{'\xB9'}, 0xB9}, {{'\xBA'}, 0xBA}, {{'\xBB'}, 0xBB},
- {{'\xBC'}, 0xBC}, {{'\xBD'}, 0xBD}, {{'\xBE'}, 0xBE}, {{'\xBF'}, 0xBF},
- {{'\xC0'}, 0xC0}, {{'\xC1'}, 0xC1}, {{'\xC2'}, 0xC2}, {{'\xC3'}, 0xC3},
- {{'\xC4'}, 0xC4}, {{'\xC5'}, 0xC5}, {{'\xC6'}, 0xC6}, {{'\xC7'}, 0xC7},
- {{'\xC8'}, 0xC8}, {{'\xC9'}, 0xC9}, {{'\xCA'}, 0xCA}, {{'\xCB'}, 0xCB},
- {{'\xCC'}, 0xCC}, {{'\xCD'}, 0xCD}, {{'\xCE'}, 0xCE}, {{'\xCF'}, 0xCF},
- {{'\xD0'}, 0xD0}, {{'\xD1'}, 0xD1}, {{'\xD2'}, 0xD2}, {{'\xD3'}, 0xD3},
- {{'\xD4'}, 0xD4}, {{'\xD5'}, 0xD5}, {{'\xD6'}, 0xD6}, {{'\xD7'}, 0xD7},
- {{'\xD8'}, 0xD8}, {{'\xD9'}, 0xD9}, {{'\xDA'}, 0xDA}, {{'\xDB'}, 0xDB},
- {{'\xDC'}, 0xDC}, {{'\xDD'}, 0xDD}, {{'\xDE'}, 0xDE}, {{'\xDF'}, 0xDF},
- {{'\xE0'}, 0xE0}, {{'\xE1'}, 0xE1}, {{'\xE2'}, 0xE2}, {{'\xE3'}, 0xE3},
- {{'\xE4'}, 0xE4}, {{'\xE5'}, 0xE5}, {{'\xE6'}, 0xE6}, {{'\xE7'}, 0xE7},
- {{'\xE8'}, 0xE8}, {{'\xE9'}, 0xE9}, {{'\xEA'}, 0xEA}, {{'\xEB'}, 0xEB},
- {{'\xEC'}, 0xEC}, {{'\xED'}, 0xED}, {{'\xEE'}, 0xEE}, {{'\xEF'}, 0xEF},
- {{'\xF0'}, 0xF0}, {{'\xF1'}, 0xF1}, {{'\xF2'}, 0xF2}, {{'\xF3'}, 0xF3},
- {{'\xF4'}, 0xF4}, {{'\xF5'}, 0xF5}, {{'\xF6'}, 0xF6}, {{'\xF7'}, 0xF7},
- {{'\xF8'}, 0xF8}, {{'\xF9'}, 0xF9}, {{'\xFA'}, 0xFA}, {{'\xFB'}, 0xFB},
- {{'\xFC'}, 0xFC}, {{'\xFD'}, 0xFD}, {{'\xFE'}, 0xFE}, {{'\xFF'}, 0xFF},
- };
- char c; // Char
- is.get(c);
- std::vector<char> s{c}; // String
- while (is.get(c))
- {
- std::vector<char> s_c{s}; // String + Char
- s_c.push_back(c);
- if (code_table.count(s_c) == 0)
- {
- os.write(reinterpret_cast<const char *> (&code_table[s]), sizeof (CodeType));
- code_table.insert({s_c, code_table.size()});
- s = {c};
- }
- else
- s = s_c;
- }
- os.write(reinterpret_cast<const char *> (&code_table[s]), sizeof (CodeType));
- }
- ///
- /// @brief Decompresses the contents of `is` and writes the result to `os`.
- /// @tparam CodeType data type used for codes
- /// @param [in] is input stream
- /// @param [out] os output stream
- ///
- template <typename CodeType = uint32_t>
- void decompress(std::istream &is, std::ostream &os)
- {
- std::vector<std::vector<char>> code_table {
- {'\x00'}, {'\x01'}, {'\x02'}, {'\x03'}, {'\x04'}, {'\x05'}, {'\x06'}, {'\x07'},
- {'\x08'}, {'\x09'}, {'\x0A'}, {'\x0B'}, {'\x0C'}, {'\x0D'}, {'\x0E'}, {'\x0F'},
- {'\x10'}, {'\x11'}, {'\x12'}, {'\x13'}, {'\x14'}, {'\x15'}, {'\x16'}, {'\x17'},
- {'\x18'}, {'\x19'}, {'\x1A'}, {'\x1B'}, {'\x1C'}, {'\x1D'}, {'\x1E'}, {'\x1F'},
- {'\x20'}, {'\x21'}, {'\x22'}, {'\x23'}, {'\x24'}, {'\x25'}, {'\x26'}, {'\x27'},
- {'\x28'}, {'\x29'}, {'\x2A'}, {'\x2B'}, {'\x2C'}, {'\x2D'}, {'\x2E'}, {'\x2F'},
- {'\x30'}, {'\x31'}, {'\x32'}, {'\x33'}, {'\x34'}, {'\x35'}, {'\x36'}, {'\x37'},
- {'\x38'}, {'\x39'}, {'\x3A'}, {'\x3B'}, {'\x3C'}, {'\x3D'}, {'\x3E'}, {'\x3F'},
- {'\x40'}, {'\x41'}, {'\x42'}, {'\x43'}, {'\x44'}, {'\x45'}, {'\x46'}, {'\x47'},
- {'\x48'}, {'\x49'}, {'\x4A'}, {'\x4B'}, {'\x4C'}, {'\x4D'}, {'\x4E'}, {'\x4F'},
- {'\x50'}, {'\x51'}, {'\x52'}, {'\x53'}, {'\x54'}, {'\x55'}, {'\x56'}, {'\x57'},
- {'\x58'}, {'\x59'}, {'\x5A'}, {'\x5B'}, {'\x5C'}, {'\x5D'}, {'\x5E'}, {'\x5F'},
- {'\x60'}, {'\x61'}, {'\x62'}, {'\x63'}, {'\x64'}, {'\x65'}, {'\x66'}, {'\x67'},
- {'\x68'}, {'\x69'}, {'\x6A'}, {'\x6B'}, {'\x6C'}, {'\x6D'}, {'\x6E'}, {'\x6F'},
- {'\x70'}, {'\x71'}, {'\x72'}, {'\x73'}, {'\x74'}, {'\x75'}, {'\x76'}, {'\x77'},
- {'\x78'}, {'\x79'}, {'\x7A'}, {'\x7B'}, {'\x7C'}, {'\x7D'}, {'\x7E'}, {'\x7F'},
- {'\x80'}, {'\x81'}, {'\x82'}, {'\x83'}, {'\x84'}, {'\x85'}, {'\x86'}, {'\x87'},
- {'\x88'}, {'\x89'}, {'\x8A'}, {'\x8B'}, {'\x8C'}, {'\x8D'}, {'\x8E'}, {'\x8F'},
- {'\x90'}, {'\x91'}, {'\x92'}, {'\x93'}, {'\x94'}, {'\x95'}, {'\x96'}, {'\x97'},
- {'\x98'}, {'\x99'}, {'\x9A'}, {'\x9B'}, {'\x9C'}, {'\x9D'}, {'\x9E'}, {'\x9F'},
- {'\xA0'}, {'\xA1'}, {'\xA2'}, {'\xA3'}, {'\xA4'}, {'\xA5'}, {'\xA6'}, {'\xA7'},
- {'\xA8'}, {'\xA9'}, {'\xAA'}, {'\xAB'}, {'\xAC'}, {'\xAD'}, {'\xAE'}, {'\xAF'},
- {'\xB0'}, {'\xB1'}, {'\xB2'}, {'\xB3'}, {'\xB4'}, {'\xB5'}, {'\xB6'}, {'\xB7'},
- {'\xB8'}, {'\xB9'}, {'\xBA'}, {'\xBB'}, {'\xBC'}, {'\xBD'}, {'\xBE'}, {'\xBF'},
- {'\xC0'}, {'\xC1'}, {'\xC2'}, {'\xC3'}, {'\xC4'}, {'\xC5'}, {'\xC6'}, {'\xC7'},
- {'\xC8'}, {'\xC9'}, {'\xCA'}, {'\xCB'}, {'\xCC'}, {'\xCD'}, {'\xCE'}, {'\xCF'},
- {'\xD0'}, {'\xD1'}, {'\xD2'}, {'\xD3'}, {'\xD4'}, {'\xD5'}, {'\xD6'}, {'\xD7'},
- {'\xD8'}, {'\xD9'}, {'\xDA'}, {'\xDB'}, {'\xDC'}, {'\xDD'}, {'\xDE'}, {'\xDF'},
- {'\xE0'}, {'\xE1'}, {'\xE2'}, {'\xE3'}, {'\xE4'}, {'\xE5'}, {'\xE6'}, {'\xE7'},
- {'\xE8'}, {'\xE9'}, {'\xEA'}, {'\xEB'}, {'\xEC'}, {'\xED'}, {'\xEE'}, {'\xEF'},
- {'\xF0'}, {'\xF1'}, {'\xF2'}, {'\xF3'}, {'\xF4'}, {'\xF5'}, {'\xF6'}, {'\xF7'},
- {'\xF8'}, {'\xF9'}, {'\xFA'}, {'\xFB'}, {'\xFC'}, {'\xFD'}, {'\xFE'}, {'\xFF'}
- };
- CodeType oc; // Old Code
- CodeType nc; // New Code
- char c; // Char
- is.read(reinterpret_cast<char *> (&oc), sizeof (CodeType));
- os.write(&code_table[oc].front(), code_table[oc].size());
- while (is.read(reinterpret_cast<char *> (&nc), sizeof (CodeType)))
- {
- std::vector<char> s; // String
- if (nc >= code_table.size())
- {
- s = code_table[oc];
- s.push_back(c);
- }
- else
- s = code_table[nc];
- os.write(&s.front(), s.size());
- c = s[0];
- std::vector<char> oc_c{code_table[oc]}; // Old Code + Char
- oc_c.push_back(c);
- code_table.push_back(oc_c);
- oc = nc;
- }
- }
- ///
- /// @brief Prints usage information and a custom message.
- /// @param s custom message to be printed
- ///
- void print_usage(const std::string &s = "")
- {
- std::cerr << "\nUsage:\n";
- std::cerr << "\tprogram -flag input_file output_file\n\n";
- std::cerr << "Where `flag' is either `c' for compressing, or `d' for decompressing, and\n";
- std::cerr << "`input_file' and `output_file' are distinct files.\n\n";
- std::cerr << "Examples:\n";
- std::cerr << "\tlzw.exe -c license.txt license.lzw\n";
- std::cerr << "\tlzw.exe -d license.lzw new_license.txt\n";
- if (!s.empty())
- std::cerr << "\nERROR: " << s << '\n';
- std::cerr << std::endl;
- }
- } // namespace
- ///
- /// @brief Actual program entry point.
- /// @param argc number of command line arguments
- /// @param [in] argv array of command line arguments
- /// @retval EXIT_FAILURE for failed operation
- /// @retval EXIT_SUCCESS for successful operation
- ///
- int main(int argc, char *argv[])
- {
- if (argc != 4)
- {
- print_usage("Wrong number of arguments.");
- return EXIT_FAILURE;
- }
- enum class Mode {
- Compress,
- Decompress
- };
- Mode m;
- if (std::string(argv[1]) == "-c")
- m = Mode::Compress;
- else
- if (std::string(argv[1]) == "-d")
- m = Mode::Decompress;
- else
- {
- print_usage(std::string("flag `") + argv[1] + "' is not recognized.");
- return EXIT_FAILURE;
- }
- std::ifstream input_file(argv[2], std::ios_base::binary);
- if (!input_file.is_open())
- {
- print_usage(std::string("input_file `") + argv[2] + "' could not be opened.");
- return EXIT_FAILURE;
- }
- std::ofstream output_file(argv[3], std::ios_base::binary);
- if (!output_file.is_open())
- {
- print_usage(std::string("output_file `") + argv[3] + "' could not be opened.");
- return EXIT_FAILURE;
- }
- try
- {
- input_file.exceptions(std::ios_base::badbit);
- output_file.exceptions(std::ios_base::badbit | std::ios_base::failbit);
- if (m == Mode::Compress)
- compress(input_file, output_file);
- else
- if (m == Mode::Decompress)
- decompress(input_file, output_file);
- }
- catch (const std::ios_base::failure &f)
- {
- print_usage(std::string("File input/output failure: ") + f.what() + '.');
- return EXIT_FAILURE;
- }
- return EXIT_SUCCESS;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement