///
/// @file
/// @author Julius Pettersson
/// @copyright MIT/Expat License.
/// @brief LZW archiver, naive implementation.
///
/// This is the C++11 implementation of a Lempel-Ziv-Welch single-file command-line archiver.
/// It uses the simpler fixed-width code compression method.
/// It was written with Doxygen comments.
///
/// http://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch
/// http://en.cppreference.com/
/// http://www.doxygen.org/
///
#include <cstdint>
#include <cstdlib>
#include <fstream>
#include <ios>
#include <iostream>
#include <map>
#include <ostream>
#include <string>
#include <vector>
namespace {
///
/// @brief Compresses the contents of `is` and writes the result to `os`.
/// @tparam CodeType data type used for codes
/// @param [in] is input stream
/// @param [out] os output stream
///
template <typename CodeType = uint32_t>
void compress(std::istream &is, std::ostream &os)
{
std::map<std::vector<char>, CodeType> code_table {
{{'\x00'}, 0x00}, {{'\x01'}, 0x01}, {{'\x02'}, 0x02}, {{'\x03'}, 0x03},
{{'\x04'}, 0x04}, {{'\x05'}, 0x05}, {{'\x06'}, 0x06}, {{'\x07'}, 0x07},
{{'\x08'}, 0x08}, {{'\x09'}, 0x09}, {{'\x0A'}, 0x0A}, {{'\x0B'}, 0x0B},
{{'\x0C'}, 0x0C}, {{'\x0D'}, 0x0D}, {{'\x0E'}, 0x0E}, {{'\x0F'}, 0x0F},
{{'\x10'}, 0x10}, {{'\x11'}, 0x11}, {{'\x12'}, 0x12}, {{'\x13'}, 0x13},
{{'\x14'}, 0x14}, {{'\x15'}, 0x15}, {{'\x16'}, 0x16}, {{'\x17'}, 0x17},
{{'\x18'}, 0x18}, {{'\x19'}, 0x19}, {{'\x1A'}, 0x1A}, {{'\x1B'}, 0x1B},
{{'\x1C'}, 0x1C}, {{'\x1D'}, 0x1D}, {{'\x1E'}, 0x1E}, {{'\x1F'}, 0x1F},
{{'\x20'}, 0x20}, {{'\x21'}, 0x21}, {{'\x22'}, 0x22}, {{'\x23'}, 0x23},
{{'\x24'}, 0x24}, {{'\x25'}, 0x25}, {{'\x26'}, 0x26}, {{'\x27'}, 0x27},
{{'\x28'}, 0x28}, {{'\x29'}, 0x29}, {{'\x2A'}, 0x2A}, {{'\x2B'}, 0x2B},
{{'\x2C'}, 0x2C}, {{'\x2D'}, 0x2D}, {{'\x2E'}, 0x2E}, {{'\x2F'}, 0x2F},
{{'\x30'}, 0x30}, {{'\x31'}, 0x31}, {{'\x32'}, 0x32}, {{'\x33'}, 0x33},
{{'\x34'}, 0x34}, {{'\x35'}, 0x35}, {{'\x36'}, 0x36}, {{'\x37'}, 0x37},
{{'\x38'}, 0x38}, {{'\x39'}, 0x39}, {{'\x3A'}, 0x3A}, {{'\x3B'}, 0x3B},
{{'\x3C'}, 0x3C}, {{'\x3D'}, 0x3D}, {{'\x3E'}, 0x3E}, {{'\x3F'}, 0x3F},
{{'\x40'}, 0x40}, {{'\x41'}, 0x41}, {{'\x42'}, 0x42}, {{'\x43'}, 0x43},
{{'\x44'}, 0x44}, {{'\x45'}, 0x45}, {{'\x46'}, 0x46}, {{'\x47'}, 0x47},
{{'\x48'}, 0x48}, {{'\x49'}, 0x49}, {{'\x4A'}, 0x4A}, {{'\x4B'}, 0x4B},
{{'\x4C'}, 0x4C}, {{'\x4D'}, 0x4D}, {{'\x4E'}, 0x4E}, {{'\x4F'}, 0x4F},
{{'\x50'}, 0x50}, {{'\x51'}, 0x51}, {{'\x52'}, 0x52}, {{'\x53'}, 0x53},
{{'\x54'}, 0x54}, {{'\x55'}, 0x55}, {{'\x56'}, 0x56}, {{'\x57'}, 0x57},
{{'\x58'}, 0x58}, {{'\x59'}, 0x59}, {{'\x5A'}, 0x5A}, {{'\x5B'}, 0x5B},
{{'\x5C'}, 0x5C}, {{'\x5D'}, 0x5D}, {{'\x5E'}, 0x5E}, {{'\x5F'}, 0x5F},
{{'\x60'}, 0x60}, {{'\x61'}, 0x61}, {{'\x62'}, 0x62}, {{'\x63'}, 0x63},
{{'\x64'}, 0x64}, {{'\x65'}, 0x65}, {{'\x66'}, 0x66}, {{'\x67'}, 0x67},
{{'\x68'}, 0x68}, {{'\x69'}, 0x69}, {{'\x6A'}, 0x6A}, {{'\x6B'}, 0x6B},
{{'\x6C'}, 0x6C}, {{'\x6D'}, 0x6D}, {{'\x6E'}, 0x6E}, {{'\x6F'}, 0x6F},
{{'\x70'}, 0x70}, {{'\x71'}, 0x71}, {{'\x72'}, 0x72}, {{'\x73'}, 0x73},
{{'\x74'}, 0x74}, {{'\x75'}, 0x75}, {{'\x76'}, 0x76}, {{'\x77'}, 0x77},
{{'\x78'}, 0x78}, {{'\x79'}, 0x79}, {{'\x7A'}, 0x7A}, {{'\x7B'}, 0x7B},
{{'\x7C'}, 0x7C}, {{'\x7D'}, 0x7D}, {{'\x7E'}, 0x7E}, {{'\x7F'}, 0x7F},
{{'\x80'}, 0x80}, {{'\x81'}, 0x81}, {{'\x82'}, 0x82}, {{'\x83'}, 0x83},
{{'\x84'}, 0x84}, {{'\x85'}, 0x85}, {{'\x86'}, 0x86}, {{'\x87'}, 0x87},
{{'\x88'}, 0x88}, {{'\x89'}, 0x89}, {{'\x8A'}, 0x8A}, {{'\x8B'}, 0x8B},
{{'\x8C'}, 0x8C}, {{'\x8D'}, 0x8D}, {{'\x8E'}, 0x8E}, {{'\x8F'}, 0x8F},
{{'\x90'}, 0x90}, {{'\x91'}, 0x91}, {{'\x92'}, 0x92}, {{'\x93'}, 0x93},
{{'\x94'}, 0x94}, {{'\x95'}, 0x95}, {{'\x96'}, 0x96}, {{'\x97'}, 0x97},
{{'\x98'}, 0x98}, {{'\x99'}, 0x99}, {{'\x9A'}, 0x9A}, {{'\x9B'}, 0x9B},
{{'\x9C'}, 0x9C}, {{'\x9D'}, 0x9D}, {{'\x9E'}, 0x9E}, {{'\x9F'}, 0x9F},
{{'\xA0'}, 0xA0}, {{'\xA1'}, 0xA1}, {{'\xA2'}, 0xA2}, {{'\xA3'}, 0xA3},
{{'\xA4'}, 0xA4}, {{'\xA5'}, 0xA5}, {{'\xA6'}, 0xA6}, {{'\xA7'}, 0xA7},
{{'\xA8'}, 0xA8}, {{'\xA9'}, 0xA9}, {{'\xAA'}, 0xAA}, {{'\xAB'}, 0xAB},
{{'\xAC'}, 0xAC}, {{'\xAD'}, 0xAD}, {{'\xAE'}, 0xAE}, {{'\xAF'}, 0xAF},
{{'\xB0'}, 0xB0}, {{'\xB1'}, 0xB1}, {{'\xB2'}, 0xB2}, {{'\xB3'}, 0xB3},
{{'\xB4'}, 0xB4}, {{'\xB5'}, 0xB5}, {{'\xB6'}, 0xB6}, {{'\xB7'}, 0xB7},
{{'\xB8'}, 0xB8}, {{'\xB9'}, 0xB9}, {{'\xBA'}, 0xBA}, {{'\xBB'}, 0xBB},
{{'\xBC'}, 0xBC}, {{'\xBD'}, 0xBD}, {{'\xBE'}, 0xBE}, {{'\xBF'}, 0xBF},
{{'\xC0'}, 0xC0}, {{'\xC1'}, 0xC1}, {{'\xC2'}, 0xC2}, {{'\xC3'}, 0xC3},
{{'\xC4'}, 0xC4}, {{'\xC5'}, 0xC5}, {{'\xC6'}, 0xC6}, {{'\xC7'}, 0xC7},
{{'\xC8'}, 0xC8}, {{'\xC9'}, 0xC9}, {{'\xCA'}, 0xCA}, {{'\xCB'}, 0xCB},
{{'\xCC'}, 0xCC}, {{'\xCD'}, 0xCD}, {{'\xCE'}, 0xCE}, {{'\xCF'}, 0xCF},
{{'\xD0'}, 0xD0}, {{'\xD1'}, 0xD1}, {{'\xD2'}, 0xD2}, {{'\xD3'}, 0xD3},
{{'\xD4'}, 0xD4}, {{'\xD5'}, 0xD5}, {{'\xD6'}, 0xD6}, {{'\xD7'}, 0xD7},
{{'\xD8'}, 0xD8}, {{'\xD9'}, 0xD9}, {{'\xDA'}, 0xDA}, {{'\xDB'}, 0xDB},
{{'\xDC'}, 0xDC}, {{'\xDD'}, 0xDD}, {{'\xDE'}, 0xDE}, {{'\xDF'}, 0xDF},
{{'\xE0'}, 0xE0}, {{'\xE1'}, 0xE1}, {{'\xE2'}, 0xE2}, {{'\xE3'}, 0xE3},
{{'\xE4'}, 0xE4}, {{'\xE5'}, 0xE5}, {{'\xE6'}, 0xE6}, {{'\xE7'}, 0xE7},
{{'\xE8'}, 0xE8}, {{'\xE9'}, 0xE9}, {{'\xEA'}, 0xEA}, {{'\xEB'}, 0xEB},
{{'\xEC'}, 0xEC}, {{'\xED'}, 0xED}, {{'\xEE'}, 0xEE}, {{'\xEF'}, 0xEF},
{{'\xF0'}, 0xF0}, {{'\xF1'}, 0xF1}, {{'\xF2'}, 0xF2}, {{'\xF3'}, 0xF3},
{{'\xF4'}, 0xF4}, {{'\xF5'}, 0xF5}, {{'\xF6'}, 0xF6}, {{'\xF7'}, 0xF7},
{{'\xF8'}, 0xF8}, {{'\xF9'}, 0xF9}, {{'\xFA'}, 0xFA}, {{'\xFB'}, 0xFB},
{{'\xFC'}, 0xFC}, {{'\xFD'}, 0xFD}, {{'\xFE'}, 0xFE}, {{'\xFF'}, 0xFF},
};
char c; // Char
is.get(c);
std::vector<char> s{c}; // String
while (is.get(c))
{
std::vector<char> s_c{s}; // String + Char
s_c.push_back(c);
if (code_table.count(s_c) == 0)
{
os.write(reinterpret_cast<const char *> (&code_table[s]), sizeof (CodeType));
code_table.insert({s_c, code_table.size()});
s = {c};
}
else
s = s_c;
}
os.write(reinterpret_cast<const char *> (&code_table[s]), sizeof (CodeType));
}
///
/// @brief Decompresses the contents of `is` and writes the result to `os`.
/// @tparam CodeType data type used for codes
/// @param [in] is input stream
/// @param [out] os output stream
///
template <typename CodeType = uint32_t>
void decompress(std::istream &is, std::ostream &os)
{
std::vector<std::vector<char>> code_table {
{'\x00'}, {'\x01'}, {'\x02'}, {'\x03'}, {'\x04'}, {'\x05'}, {'\x06'}, {'\x07'},
{'\x08'}, {'\x09'}, {'\x0A'}, {'\x0B'}, {'\x0C'}, {'\x0D'}, {'\x0E'}, {'\x0F'},
{'\x10'}, {'\x11'}, {'\x12'}, {'\x13'}, {'\x14'}, {'\x15'}, {'\x16'}, {'\x17'},
{'\x18'}, {'\x19'}, {'\x1A'}, {'\x1B'}, {'\x1C'}, {'\x1D'}, {'\x1E'}, {'\x1F'},
{'\x20'}, {'\x21'}, {'\x22'}, {'\x23'}, {'\x24'}, {'\x25'}, {'\x26'}, {'\x27'},
{'\x28'}, {'\x29'}, {'\x2A'}, {'\x2B'}, {'\x2C'}, {'\x2D'}, {'\x2E'}, {'\x2F'},
{'\x30'}, {'\x31'}, {'\x32'}, {'\x33'}, {'\x34'}, {'\x35'}, {'\x36'}, {'\x37'},
{'\x38'}, {'\x39'}, {'\x3A'}, {'\x3B'}, {'\x3C'}, {'\x3D'}, {'\x3E'}, {'\x3F'},
{'\x40'}, {'\x41'}, {'\x42'}, {'\x43'}, {'\x44'}, {'\x45'}, {'\x46'}, {'\x47'},
{'\x48'}, {'\x49'}, {'\x4A'}, {'\x4B'}, {'\x4C'}, {'\x4D'}, {'\x4E'}, {'\x4F'},
{'\x50'}, {'\x51'}, {'\x52'}, {'\x53'}, {'\x54'}, {'\x55'}, {'\x56'}, {'\x57'},
{'\x58'}, {'\x59'}, {'\x5A'}, {'\x5B'}, {'\x5C'}, {'\x5D'}, {'\x5E'}, {'\x5F'},
{'\x60'}, {'\x61'}, {'\x62'}, {'\x63'}, {'\x64'}, {'\x65'}, {'\x66'}, {'\x67'},
{'\x68'}, {'\x69'}, {'\x6A'}, {'\x6B'}, {'\x6C'}, {'\x6D'}, {'\x6E'}, {'\x6F'},
{'\x70'}, {'\x71'}, {'\x72'}, {'\x73'}, {'\x74'}, {'\x75'}, {'\x76'}, {'\x77'},
{'\x78'}, {'\x79'}, {'\x7A'}, {'\x7B'}, {'\x7C'}, {'\x7D'}, {'\x7E'}, {'\x7F'},
{'\x80'}, {'\x81'}, {'\x82'}, {'\x83'}, {'\x84'}, {'\x85'}, {'\x86'}, {'\x87'},
{'\x88'}, {'\x89'}, {'\x8A'}, {'\x8B'}, {'\x8C'}, {'\x8D'}, {'\x8E'}, {'\x8F'},
{'\x90'}, {'\x91'}, {'\x92'}, {'\x93'}, {'\x94'}, {'\x95'}, {'\x96'}, {'\x97'},
{'\x98'}, {'\x99'}, {'\x9A'}, {'\x9B'}, {'\x9C'}, {'\x9D'}, {'\x9E'}, {'\x9F'},
{'\xA0'}, {'\xA1'}, {'\xA2'}, {'\xA3'}, {'\xA4'}, {'\xA5'}, {'\xA6'}, {'\xA7'},
{'\xA8'}, {'\xA9'}, {'\xAA'}, {'\xAB'}, {'\xAC'}, {'\xAD'}, {'\xAE'}, {'\xAF'},
{'\xB0'}, {'\xB1'}, {'\xB2'}, {'\xB3'}, {'\xB4'}, {'\xB5'}, {'\xB6'}, {'\xB7'},
{'\xB8'}, {'\xB9'}, {'\xBA'}, {'\xBB'}, {'\xBC'}, {'\xBD'}, {'\xBE'}, {'\xBF'},
{'\xC0'}, {'\xC1'}, {'\xC2'}, {'\xC3'}, {'\xC4'}, {'\xC5'}, {'\xC6'}, {'\xC7'},
{'\xC8'}, {'\xC9'}, {'\xCA'}, {'\xCB'}, {'\xCC'}, {'\xCD'}, {'\xCE'}, {'\xCF'},
{'\xD0'}, {'\xD1'}, {'\xD2'}, {'\xD3'}, {'\xD4'}, {'\xD5'}, {'\xD6'}, {'\xD7'},
{'\xD8'}, {'\xD9'}, {'\xDA'}, {'\xDB'}, {'\xDC'}, {'\xDD'}, {'\xDE'}, {'\xDF'},
{'\xE0'}, {'\xE1'}, {'\xE2'}, {'\xE3'}, {'\xE4'}, {'\xE5'}, {'\xE6'}, {'\xE7'},
{'\xE8'}, {'\xE9'}, {'\xEA'}, {'\xEB'}, {'\xEC'}, {'\xED'}, {'\xEE'}, {'\xEF'},
{'\xF0'}, {'\xF1'}, {'\xF2'}, {'\xF3'}, {'\xF4'}, {'\xF5'}, {'\xF6'}, {'\xF7'},
{'\xF8'}, {'\xF9'}, {'\xFA'}, {'\xFB'}, {'\xFC'}, {'\xFD'}, {'\xFE'}, {'\xFF'}
};
CodeType oc; // Old Code
CodeType nc; // New Code
char c; // Char
is.read(reinterpret_cast<char *> (&oc), sizeof (CodeType));
os.write(&code_table[oc].front(), code_table[oc].size());
while (is.read(reinterpret_cast<char *> (&nc), sizeof (CodeType)))
{
std::vector<char> s; // String
if (nc >= code_table.size())
{
s = code_table[oc];
s.push_back(c);
}
else
s = code_table[nc];
os.write(&s.front(), s.size());
c = s[0];
std::vector<char> oc_c{code_table[oc]}; // Old Code + Char
oc_c.push_back(c);
code_table.push_back(oc_c);
oc = nc;
}
}
///
/// @brief Prints usage information and a custom message.
/// @param s custom message to be printed
///
void print_usage(const std::string &s = "")
{
std::cerr << "\nUsage:\n";
std::cerr << "\tprogram -flag input_file output_file\n\n";
std::cerr << "Where `flag' is either `c' for compressing, or `d' for decompressing, and\n";
std::cerr << "`input_file' and `output_file' are distinct files.\n\n";
std::cerr << "Examples:\n";
std::cerr << "\tlzw.exe -c license.txt license.lzw\n";
std::cerr << "\tlzw.exe -d license.lzw new_license.txt\n";
if (!s.empty())
std::cerr << "\nERROR: " << s << '\n';
std::cerr << std::endl;
}
} // namespace
///
/// @brief Actual program entry point.
/// @param argc number of command line arguments
/// @param [in] argv array of command line arguments
/// @retval EXIT_FAILURE for failed operation
/// @retval EXIT_SUCCESS for successful operation
///
int main(int argc, char *argv[])
{
if (argc != 4)
{
print_usage("Wrong number of arguments.");
return EXIT_FAILURE;
}
enum class Mode {
Compress,
Decompress
};
Mode m;
if (std::string(argv[1]) == "-c")
m = Mode::Compress;
else
if (std::string(argv[1]) == "-d")
m = Mode::Decompress;
else
{
print_usage(std::string("flag `") + argv[1] + "' is not recognized.");
return EXIT_FAILURE;
}
std::ifstream input_file(argv[2], std::ios_base::binary);
if (!input_file.is_open())
{
print_usage(std::string("input_file `") + argv[2] + "' could not be opened.");
return EXIT_FAILURE;
}
std::ofstream output_file(argv[3], std::ios_base::binary);
if (!output_file.is_open())
{
print_usage(std::string("output_file `") + argv[3] + "' could not be opened.");
return EXIT_FAILURE;
}
try
{
input_file.exceptions(std::ios_base::badbit);
output_file.exceptions(std::ios_base::badbit | std::ios_base::failbit);
if (m == Mode::Compress)
compress(input_file, output_file);
else
if (m == Mode::Decompress)
decompress(input_file, output_file);
}
catch (const std::ios_base::failure &f)
{
print_usage(std::string("File input/output failure: ") + f.what() + '.');
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}