/// /// @file /// @author Julius Pettersson /// @copyright MIT/Expat License. /// @brief LZW archiver, naive implementation. /// /// This is the C++11 implementation of a Lempel-Ziv-Welch single-file command-line archiver. /// It uses the simpler fixed-width code compression method. /// It was written with Doxygen comments. /// /// http://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch /// http://en.cppreference.com/ /// http://www.doxygen.org/ /// #include #include #include #include #include #include #include #include #include namespace { /// /// @brief Compresses the contents of `is` and writes the result to `os`. /// @tparam CodeType data type used for codes /// @param [in] is input stream /// @param [out] os output stream /// template void compress(std::istream &is, std::ostream &os) { std::map, CodeType> code_table { {{'\x00'}, 0x00}, {{'\x01'}, 0x01}, {{'\x02'}, 0x02}, {{'\x03'}, 0x03}, {{'\x04'}, 0x04}, {{'\x05'}, 0x05}, {{'\x06'}, 0x06}, {{'\x07'}, 0x07}, {{'\x08'}, 0x08}, {{'\x09'}, 0x09}, {{'\x0A'}, 0x0A}, {{'\x0B'}, 0x0B}, {{'\x0C'}, 0x0C}, {{'\x0D'}, 0x0D}, {{'\x0E'}, 0x0E}, {{'\x0F'}, 0x0F}, {{'\x10'}, 0x10}, {{'\x11'}, 0x11}, {{'\x12'}, 0x12}, {{'\x13'}, 0x13}, {{'\x14'}, 0x14}, {{'\x15'}, 0x15}, {{'\x16'}, 0x16}, {{'\x17'}, 0x17}, {{'\x18'}, 0x18}, {{'\x19'}, 0x19}, {{'\x1A'}, 0x1A}, {{'\x1B'}, 0x1B}, {{'\x1C'}, 0x1C}, {{'\x1D'}, 0x1D}, {{'\x1E'}, 0x1E}, {{'\x1F'}, 0x1F}, {{'\x20'}, 0x20}, {{'\x21'}, 0x21}, {{'\x22'}, 0x22}, {{'\x23'}, 0x23}, {{'\x24'}, 0x24}, {{'\x25'}, 0x25}, {{'\x26'}, 0x26}, {{'\x27'}, 0x27}, {{'\x28'}, 0x28}, {{'\x29'}, 0x29}, {{'\x2A'}, 0x2A}, {{'\x2B'}, 0x2B}, {{'\x2C'}, 0x2C}, {{'\x2D'}, 0x2D}, {{'\x2E'}, 0x2E}, {{'\x2F'}, 0x2F}, {{'\x30'}, 0x30}, {{'\x31'}, 0x31}, {{'\x32'}, 0x32}, {{'\x33'}, 0x33}, {{'\x34'}, 0x34}, {{'\x35'}, 0x35}, {{'\x36'}, 0x36}, {{'\x37'}, 0x37}, {{'\x38'}, 0x38}, {{'\x39'}, 0x39}, {{'\x3A'}, 0x3A}, {{'\x3B'}, 0x3B}, {{'\x3C'}, 0x3C}, {{'\x3D'}, 0x3D}, {{'\x3E'}, 0x3E}, {{'\x3F'}, 0x3F}, {{'\x40'}, 0x40}, {{'\x41'}, 0x41}, {{'\x42'}, 0x42}, {{'\x43'}, 0x43}, {{'\x44'}, 0x44}, {{'\x45'}, 0x45}, {{'\x46'}, 0x46}, {{'\x47'}, 0x47}, {{'\x48'}, 0x48}, {{'\x49'}, 0x49}, {{'\x4A'}, 0x4A}, {{'\x4B'}, 0x4B}, {{'\x4C'}, 0x4C}, {{'\x4D'}, 0x4D}, {{'\x4E'}, 0x4E}, {{'\x4F'}, 0x4F}, {{'\x50'}, 0x50}, {{'\x51'}, 0x51}, {{'\x52'}, 0x52}, {{'\x53'}, 0x53}, {{'\x54'}, 0x54}, {{'\x55'}, 0x55}, {{'\x56'}, 0x56}, {{'\x57'}, 0x57}, {{'\x58'}, 0x58}, {{'\x59'}, 0x59}, {{'\x5A'}, 0x5A}, {{'\x5B'}, 0x5B}, {{'\x5C'}, 0x5C}, {{'\x5D'}, 0x5D}, {{'\x5E'}, 0x5E}, {{'\x5F'}, 0x5F}, {{'\x60'}, 0x60}, {{'\x61'}, 0x61}, {{'\x62'}, 0x62}, {{'\x63'}, 0x63}, {{'\x64'}, 0x64}, {{'\x65'}, 0x65}, {{'\x66'}, 0x66}, {{'\x67'}, 0x67}, {{'\x68'}, 0x68}, {{'\x69'}, 0x69}, {{'\x6A'}, 0x6A}, {{'\x6B'}, 0x6B}, {{'\x6C'}, 0x6C}, {{'\x6D'}, 0x6D}, {{'\x6E'}, 0x6E}, {{'\x6F'}, 0x6F}, {{'\x70'}, 0x70}, {{'\x71'}, 0x71}, {{'\x72'}, 0x72}, {{'\x73'}, 0x73}, {{'\x74'}, 0x74}, {{'\x75'}, 0x75}, {{'\x76'}, 0x76}, {{'\x77'}, 0x77}, {{'\x78'}, 0x78}, {{'\x79'}, 0x79}, {{'\x7A'}, 0x7A}, {{'\x7B'}, 0x7B}, {{'\x7C'}, 0x7C}, {{'\x7D'}, 0x7D}, {{'\x7E'}, 0x7E}, {{'\x7F'}, 0x7F}, {{'\x80'}, 0x80}, {{'\x81'}, 0x81}, {{'\x82'}, 0x82}, {{'\x83'}, 0x83}, {{'\x84'}, 0x84}, {{'\x85'}, 0x85}, {{'\x86'}, 0x86}, {{'\x87'}, 0x87}, {{'\x88'}, 0x88}, {{'\x89'}, 0x89}, {{'\x8A'}, 0x8A}, {{'\x8B'}, 0x8B}, {{'\x8C'}, 0x8C}, {{'\x8D'}, 0x8D}, {{'\x8E'}, 0x8E}, {{'\x8F'}, 0x8F}, {{'\x90'}, 0x90}, {{'\x91'}, 0x91}, {{'\x92'}, 0x92}, {{'\x93'}, 0x93}, {{'\x94'}, 0x94}, {{'\x95'}, 0x95}, {{'\x96'}, 0x96}, {{'\x97'}, 0x97}, {{'\x98'}, 0x98}, {{'\x99'}, 0x99}, {{'\x9A'}, 0x9A}, {{'\x9B'}, 0x9B}, {{'\x9C'}, 0x9C}, {{'\x9D'}, 0x9D}, {{'\x9E'}, 0x9E}, {{'\x9F'}, 0x9F}, {{'\xA0'}, 0xA0}, {{'\xA1'}, 0xA1}, {{'\xA2'}, 0xA2}, {{'\xA3'}, 0xA3}, {{'\xA4'}, 0xA4}, {{'\xA5'}, 0xA5}, {{'\xA6'}, 0xA6}, {{'\xA7'}, 0xA7}, {{'\xA8'}, 0xA8}, {{'\xA9'}, 0xA9}, {{'\xAA'}, 0xAA}, {{'\xAB'}, 0xAB}, {{'\xAC'}, 0xAC}, {{'\xAD'}, 0xAD}, {{'\xAE'}, 0xAE}, {{'\xAF'}, 0xAF}, {{'\xB0'}, 0xB0}, {{'\xB1'}, 0xB1}, {{'\xB2'}, 0xB2}, {{'\xB3'}, 0xB3}, {{'\xB4'}, 0xB4}, {{'\xB5'}, 0xB5}, {{'\xB6'}, 0xB6}, {{'\xB7'}, 0xB7}, {{'\xB8'}, 0xB8}, {{'\xB9'}, 0xB9}, {{'\xBA'}, 0xBA}, {{'\xBB'}, 0xBB}, {{'\xBC'}, 0xBC}, {{'\xBD'}, 0xBD}, {{'\xBE'}, 0xBE}, {{'\xBF'}, 0xBF}, {{'\xC0'}, 0xC0}, {{'\xC1'}, 0xC1}, {{'\xC2'}, 0xC2}, {{'\xC3'}, 0xC3}, {{'\xC4'}, 0xC4}, {{'\xC5'}, 0xC5}, {{'\xC6'}, 0xC6}, {{'\xC7'}, 0xC7}, {{'\xC8'}, 0xC8}, {{'\xC9'}, 0xC9}, {{'\xCA'}, 0xCA}, {{'\xCB'}, 0xCB}, {{'\xCC'}, 0xCC}, {{'\xCD'}, 0xCD}, {{'\xCE'}, 0xCE}, {{'\xCF'}, 0xCF}, {{'\xD0'}, 0xD0}, {{'\xD1'}, 0xD1}, {{'\xD2'}, 0xD2}, {{'\xD3'}, 0xD3}, {{'\xD4'}, 0xD4}, {{'\xD5'}, 0xD5}, {{'\xD6'}, 0xD6}, {{'\xD7'}, 0xD7}, {{'\xD8'}, 0xD8}, {{'\xD9'}, 0xD9}, {{'\xDA'}, 0xDA}, {{'\xDB'}, 0xDB}, {{'\xDC'}, 0xDC}, {{'\xDD'}, 0xDD}, {{'\xDE'}, 0xDE}, {{'\xDF'}, 0xDF}, {{'\xE0'}, 0xE0}, {{'\xE1'}, 0xE1}, {{'\xE2'}, 0xE2}, {{'\xE3'}, 0xE3}, {{'\xE4'}, 0xE4}, {{'\xE5'}, 0xE5}, {{'\xE6'}, 0xE6}, {{'\xE7'}, 0xE7}, {{'\xE8'}, 0xE8}, {{'\xE9'}, 0xE9}, {{'\xEA'}, 0xEA}, {{'\xEB'}, 0xEB}, {{'\xEC'}, 0xEC}, {{'\xED'}, 0xED}, {{'\xEE'}, 0xEE}, {{'\xEF'}, 0xEF}, {{'\xF0'}, 0xF0}, {{'\xF1'}, 0xF1}, {{'\xF2'}, 0xF2}, {{'\xF3'}, 0xF3}, {{'\xF4'}, 0xF4}, {{'\xF5'}, 0xF5}, {{'\xF6'}, 0xF6}, {{'\xF7'}, 0xF7}, {{'\xF8'}, 0xF8}, {{'\xF9'}, 0xF9}, {{'\xFA'}, 0xFA}, {{'\xFB'}, 0xFB}, {{'\xFC'}, 0xFC}, {{'\xFD'}, 0xFD}, {{'\xFE'}, 0xFE}, {{'\xFF'}, 0xFF}, }; char c; // Char is.get(c); std::vector s{c}; // String while (is.get(c)) { std::vector s_c{s}; // String + Char s_c.push_back(c); if (code_table.count(s_c) == 0) { os.write(reinterpret_cast (&code_table[s]), sizeof (CodeType)); code_table.insert({s_c, code_table.size()}); s = {c}; } else s = s_c; } os.write(reinterpret_cast (&code_table[s]), sizeof (CodeType)); } /// /// @brief Decompresses the contents of `is` and writes the result to `os`. /// @tparam CodeType data type used for codes /// @param [in] is input stream /// @param [out] os output stream /// template void decompress(std::istream &is, std::ostream &os) { std::vector> code_table { {'\x00'}, {'\x01'}, {'\x02'}, {'\x03'}, {'\x04'}, {'\x05'}, {'\x06'}, {'\x07'}, {'\x08'}, {'\x09'}, {'\x0A'}, {'\x0B'}, {'\x0C'}, {'\x0D'}, {'\x0E'}, {'\x0F'}, {'\x10'}, {'\x11'}, {'\x12'}, {'\x13'}, {'\x14'}, {'\x15'}, {'\x16'}, {'\x17'}, {'\x18'}, {'\x19'}, {'\x1A'}, {'\x1B'}, {'\x1C'}, {'\x1D'}, {'\x1E'}, {'\x1F'}, {'\x20'}, {'\x21'}, {'\x22'}, {'\x23'}, {'\x24'}, {'\x25'}, {'\x26'}, {'\x27'}, {'\x28'}, {'\x29'}, {'\x2A'}, {'\x2B'}, {'\x2C'}, {'\x2D'}, {'\x2E'}, {'\x2F'}, {'\x30'}, {'\x31'}, {'\x32'}, {'\x33'}, {'\x34'}, {'\x35'}, {'\x36'}, {'\x37'}, {'\x38'}, {'\x39'}, {'\x3A'}, {'\x3B'}, {'\x3C'}, {'\x3D'}, {'\x3E'}, {'\x3F'}, {'\x40'}, {'\x41'}, {'\x42'}, {'\x43'}, {'\x44'}, {'\x45'}, {'\x46'}, {'\x47'}, {'\x48'}, {'\x49'}, {'\x4A'}, {'\x4B'}, {'\x4C'}, {'\x4D'}, {'\x4E'}, {'\x4F'}, {'\x50'}, {'\x51'}, {'\x52'}, {'\x53'}, {'\x54'}, {'\x55'}, {'\x56'}, {'\x57'}, {'\x58'}, {'\x59'}, {'\x5A'}, {'\x5B'}, {'\x5C'}, {'\x5D'}, {'\x5E'}, {'\x5F'}, {'\x60'}, {'\x61'}, {'\x62'}, {'\x63'}, {'\x64'}, {'\x65'}, {'\x66'}, {'\x67'}, {'\x68'}, {'\x69'}, {'\x6A'}, {'\x6B'}, {'\x6C'}, {'\x6D'}, {'\x6E'}, {'\x6F'}, {'\x70'}, {'\x71'}, {'\x72'}, {'\x73'}, {'\x74'}, {'\x75'}, {'\x76'}, {'\x77'}, {'\x78'}, {'\x79'}, {'\x7A'}, {'\x7B'}, {'\x7C'}, {'\x7D'}, {'\x7E'}, {'\x7F'}, {'\x80'}, {'\x81'}, {'\x82'}, {'\x83'}, {'\x84'}, {'\x85'}, {'\x86'}, {'\x87'}, {'\x88'}, {'\x89'}, {'\x8A'}, {'\x8B'}, {'\x8C'}, {'\x8D'}, {'\x8E'}, {'\x8F'}, {'\x90'}, {'\x91'}, {'\x92'}, {'\x93'}, {'\x94'}, {'\x95'}, {'\x96'}, {'\x97'}, {'\x98'}, {'\x99'}, {'\x9A'}, {'\x9B'}, {'\x9C'}, {'\x9D'}, {'\x9E'}, {'\x9F'}, {'\xA0'}, {'\xA1'}, {'\xA2'}, {'\xA3'}, {'\xA4'}, {'\xA5'}, {'\xA6'}, {'\xA7'}, {'\xA8'}, {'\xA9'}, {'\xAA'}, {'\xAB'}, {'\xAC'}, {'\xAD'}, {'\xAE'}, {'\xAF'}, {'\xB0'}, {'\xB1'}, {'\xB2'}, {'\xB3'}, {'\xB4'}, {'\xB5'}, {'\xB6'}, {'\xB7'}, {'\xB8'}, {'\xB9'}, {'\xBA'}, {'\xBB'}, {'\xBC'}, {'\xBD'}, {'\xBE'}, {'\xBF'}, {'\xC0'}, {'\xC1'}, {'\xC2'}, {'\xC3'}, {'\xC4'}, {'\xC5'}, {'\xC6'}, {'\xC7'}, {'\xC8'}, {'\xC9'}, {'\xCA'}, {'\xCB'}, {'\xCC'}, {'\xCD'}, {'\xCE'}, {'\xCF'}, {'\xD0'}, {'\xD1'}, {'\xD2'}, {'\xD3'}, {'\xD4'}, {'\xD5'}, {'\xD6'}, {'\xD7'}, {'\xD8'}, {'\xD9'}, {'\xDA'}, {'\xDB'}, {'\xDC'}, {'\xDD'}, {'\xDE'}, {'\xDF'}, {'\xE0'}, {'\xE1'}, {'\xE2'}, {'\xE3'}, {'\xE4'}, {'\xE5'}, {'\xE6'}, {'\xE7'}, {'\xE8'}, {'\xE9'}, {'\xEA'}, {'\xEB'}, {'\xEC'}, {'\xED'}, {'\xEE'}, {'\xEF'}, {'\xF0'}, {'\xF1'}, {'\xF2'}, {'\xF3'}, {'\xF4'}, {'\xF5'}, {'\xF6'}, {'\xF7'}, {'\xF8'}, {'\xF9'}, {'\xFA'}, {'\xFB'}, {'\xFC'}, {'\xFD'}, {'\xFE'}, {'\xFF'} }; CodeType oc; // Old Code CodeType nc; // New Code char c; // Char is.read(reinterpret_cast (&oc), sizeof (CodeType)); os.write(&code_table[oc].front(), code_table[oc].size()); while (is.read(reinterpret_cast (&nc), sizeof (CodeType))) { std::vector s; // String if (nc >= code_table.size()) { s = code_table[oc]; s.push_back(c); } else s = code_table[nc]; os.write(&s.front(), s.size()); c = s[0]; std::vector oc_c{code_table[oc]}; // Old Code + Char oc_c.push_back(c); code_table.push_back(oc_c); oc = nc; } } /// /// @brief Prints usage information and a custom message. /// @param s custom message to be printed /// void print_usage(const std::string &s = "") { std::cerr << "\nUsage:\n"; std::cerr << "\tprogram -flag input_file output_file\n\n"; std::cerr << "Where `flag' is either `c' for compressing, or `d' for decompressing, and\n"; std::cerr << "`input_file' and `output_file' are distinct files.\n\n"; std::cerr << "Examples:\n"; std::cerr << "\tlzw.exe -c license.txt license.lzw\n"; std::cerr << "\tlzw.exe -d license.lzw new_license.txt\n"; if (!s.empty()) std::cerr << "\nERROR: " << s << '\n'; std::cerr << std::endl; } } // namespace /// /// @brief Actual program entry point. /// @param argc number of command line arguments /// @param [in] argv array of command line arguments /// @retval EXIT_FAILURE for failed operation /// @retval EXIT_SUCCESS for successful operation /// int main(int argc, char *argv[]) { if (argc != 4) { print_usage("Wrong number of arguments."); return EXIT_FAILURE; } enum class Mode { Compress, Decompress }; Mode m; if (std::string(argv[1]) == "-c") m = Mode::Compress; else if (std::string(argv[1]) == "-d") m = Mode::Decompress; else { print_usage(std::string("flag `") + argv[1] + "' is not recognized."); return EXIT_FAILURE; } std::ifstream input_file(argv[2], std::ios_base::binary); if (!input_file.is_open()) { print_usage(std::string("input_file `") + argv[2] + "' could not be opened."); return EXIT_FAILURE; } std::ofstream output_file(argv[3], std::ios_base::binary); if (!output_file.is_open()) { print_usage(std::string("output_file `") + argv[3] + "' could not be opened."); return EXIT_FAILURE; } try { input_file.exceptions(std::ios_base::badbit); output_file.exceptions(std::ios_base::badbit | std::ios_base::failbit); if (m == Mode::Compress) compress(input_file, output_file); else if (m == Mode::Decompress) decompress(input_file, output_file); } catch (const std::ios_base::failure &f) { print_usage(std::string("File input/output failure: ") + f.what() + '.'); return EXIT_FAILURE; } return EXIT_SUCCESS; }