Advertisement
Guest User

Decompression Suite A 0.0.1 - Fix needed for Huffman

a guest
Dec 27th, 2016
119
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 14.65 KB | None | 0 0
  1. /***
  2. The Decompression Suite
  3.  
  4. Written by Divingkatae
  5.  
  6. 666th post on Jul Edition
  7.  
  8. (c)2016, 2017 Divingkatae.
  9. ***/
  10.  
  11.  
  12. #include <iostream>
  13. #include <fstream>
  14. #include <cstdio>
  15. #include <unistd.h>
  16. #include <fcntl.h>
  17. #include <cstring>
  18. #include <cstdlib>
  19. #include <cstddef>
  20. #include <sys/stat.h>
  21. #include <unordered_map>
  22. #include <map>
  23. #include <map>
  24. #include <bitset>
  25.  
  26. int packbits_rle_decompress(char *get_infile, char *get_outfile, char *get_offset);
  27. int plain_8b_rle_decompress(char *get_infile, char *get_outfile, char *get_offset);
  28. int plain_4b_rle_decompress(char *get_infile, char *get_outfile, char *get_offset);
  29. int lzw_decompress(char *get_infile, char *get_outfile, char *get_offset);
  30. int huffman_decompress(char *get_infile, char *get_outfile, char *get_offset);
  31.  
  32. int fileLength(char* filename);
  33.  
  34. typedef struct huffman_node{
  35. unsigned int freq;
  36. char data;
  37. bool left_right;
  38.  
  39. } huffman_node;
  40.  
  41. typedef huffman_node* NodePtr;
  42.  
  43.  
  44. using namespace std;
  45.  
  46. int main(int argc, char *argv[])
  47. {
  48. if ( argc < 5 ){
  49. printf("An incorrect command argument was entered. Please check the documentation before continuing. \n");
  50. exit (1);
  51. }
  52. else{
  53. char *test_input = argv[1];
  54. printf("\n Compare this: %s \n", test_input);
  55. if (strcmp("packbits", test_input) == 0){
  56. printf("Time to decompress a PackBits compressed file. \n");
  57. packbits_rle_decompress(argv[2], argv[3], argv[4]);
  58. }
  59. else if (strcmp("fourbrle", test_input) == 0){
  60. printf("Time to decompress a Niblet RLE compressed file. \n");
  61. plain_4b_rle_decompress(argv[2], argv[3], argv[4]);
  62. }
  63. else if (strcmp("lzw", test_input) == 0){
  64. printf("Time to decompress an LZW compressed file. \n");
  65. lzw_decompress(argv[2], argv[3], argv[4]);
  66. }
  67. else if (strcmp("huffman", test_input) == 0){
  68. printf("Time to decompress a Huffman compressed file. \n");
  69. huffman_decompress(argv[2], argv[3], argv[4]);
  70. }
  71. else{
  72. printf("Time to decompress a generic RLE compressed file. \n");
  73. plain_8b_rle_decompress(argv[2], argv[3], argv[4]);
  74. }
  75. }
  76.  
  77. return 0;
  78. }
  79.  
  80. //Get the size of the file. This time, it's portable. Well...more portable.
  81. int fileLength(char* filename){
  82.  
  83. ifstream mySource;
  84. mySource.open(filename, ios_base::binary);
  85. mySource.seekg(0,ios_base::end);
  86. int size = mySource.tellg();
  87. mySource.close();
  88. return size;
  89. }
  90.  
  91. int packbits_rle_decompress(char *get_infile, char *get_outfile, char *get_offset){
  92. int offset_a = 0; //Starting offset
  93. unsigned char test_case = 0; //used for getting the test case
  94. unsigned char dump_char = 0; //dump this into a file
  95. //int test_arg3_length = 0; //where to begin searching in the file
  96.  
  97. printf("\n Input: %s, Output: %s, Offset: %s", get_infile, get_outfile, get_offset);
  98.  
  99. FILE* inFile = fopen(get_infile, "rb");
  100.  
  101. FILE* outFile = fopen(get_outfile, "wb");
  102.  
  103. //Get the offset to begin the decompression
  104.  
  105. int get_size = fileLength(get_infile);
  106.  
  107. fseek(inFile, 0, SEEK_SET);
  108.  
  109. //Make sure we aren't reading a nonexistant or empty file first
  110. if (get_size < 1){
  111. printf("There was no data in the input file.");
  112. exit(1);
  113. }
  114. else{
  115. printf("\n Time to decompress %d bytes \n", get_size);
  116.  
  117. //test_arg3_length = (unsigned)strlen(get_offset);
  118.  
  119. offset_a = atoi(get_offset);
  120.  
  121. printf("\n Offset: %d \n Now decompressing...", offset_a);
  122.  
  123. while (offset_a < get_size)
  124. {
  125. //Read the file line by line
  126.  
  127. fseek (inFile, (offset_a), SEEK_SET );
  128. fread(&test_case,1,1,inFile);
  129.  
  130. if (test_case >= 128){
  131. test_case = 256 - test_case;
  132.  
  133. for (int offset_b = 0; offset_b <= test_case; offset_b++){
  134. fseek (inFile, (offset_a + 1), SEEK_SET );
  135. fread(&dump_char,1,1,inFile);
  136. fwrite(&dump_char, 1, 1, outFile);
  137. }
  138.  
  139. offset_a ++;
  140.  
  141. }
  142. else{
  143. int offset_b;
  144.  
  145. for (offset_b = 0; offset_b <= test_case; offset_b++){
  146. fseek (inFile, (offset_a + offset_b + 1), SEEK_SET );
  147. fread(&dump_char,1,1,inFile);
  148. fwrite(&dump_char,1,1,outFile);
  149. }
  150.  
  151. offset_a += offset_b;
  152. }
  153.  
  154. offset_a ++;
  155. }
  156.  
  157. fclose(inFile);
  158. fclose(outFile);
  159. }
  160.  
  161. return 0;
  162. }
  163.  
  164. int plain_8b_rle_decompress(char *get_infile, char *get_outfile, char *get_offset){
  165. int offset_a = 0; //Starting offset
  166. unsigned char test_case = 0; //used for getting the test case
  167. unsigned char dump_char = 0; //dump this into a file
  168. //int test_arg3_length = 0; //where to begin searching in the file
  169.  
  170. printf("\n Input: %s, Output: %s, Offset: %s", get_infile, get_outfile, get_offset);
  171.  
  172. FILE* inFile = fopen(get_infile, "rb");
  173.  
  174. FILE* outFile = fopen(get_outfile, "wb");
  175.  
  176. //Get the offset to begin the decompression
  177.  
  178. int get_size = fileLength(get_infile);
  179.  
  180. fseek(inFile, 0, SEEK_SET);
  181.  
  182. //Make sure we aren't reading a nonexistant or empty file first
  183. if (get_size < 1){
  184. printf("There was no data in the input file.");
  185. exit(1);
  186. }
  187. else{
  188. printf("\n Time to decompress %d bytes \n", get_size);
  189.  
  190. //test_arg3_length = (unsigned)strlen(get_offset);
  191.  
  192. offset_a = atoi(get_offset);
  193.  
  194. printf("\n Offset: %d \n", offset_a);
  195.  
  196. while (offset_a < get_size)
  197. {
  198. //Read the file line by line
  199.  
  200. fseek (inFile, (offset_a), SEEK_SET );
  201. fread(&test_case,1,1,inFile);
  202.  
  203. //Get the character to dump in
  204. offset_a++;
  205. fseek (inFile, (offset_a), SEEK_SET );
  206. fread(&dump_char,1,1,inFile);
  207.  
  208. for (int offset_b = 0; offset_b <= test_case; offset_b++){
  209. fwrite(&dump_char, 1, 1, outFile);
  210. }
  211.  
  212. offset_a++;
  213. }
  214.  
  215. fclose(inFile);
  216. fclose(outFile);
  217. }
  218.  
  219. return 0;
  220. }
  221.  
  222. int plain_4b_rle_decompress(char *get_infile, char *get_outfile, char *get_offset){
  223. int offset_a = 0; //Starting offset
  224. unsigned char test_case = 0; //used for getting the test case
  225. unsigned char dump_char = 0; //dump this into a file
  226. //int test_arg3_length = 0; //where to begin searching in the file
  227.  
  228. printf("\n Input: %s, Output: %s, Offset: %s", get_infile, get_outfile, get_offset);
  229.  
  230. FILE* inFile = fopen(get_infile, "rb");
  231.  
  232. FILE* outFile = fopen(get_outfile, "wb");
  233.  
  234. //Get the offset to begin the decompression
  235.  
  236. int get_size = fileLength(get_infile);
  237.  
  238. fseek(inFile, 0, SEEK_SET);
  239.  
  240. //Make sure we aren't reading a nonexistant or empty file first
  241. if (get_size < 1){
  242. printf("There was no data in the input file.");
  243. exit(1);
  244. }
  245. else{
  246. printf("\n Time to decompress %d bytes \n", get_size);
  247.  
  248. //test_arg3_length = (unsigned)strlen(get_offset);
  249.  
  250. offset_a = atoi(get_offset);
  251.  
  252. printf("\n Offset: %d \n", offset_a);
  253.  
  254. while (offset_a < get_size)
  255. {
  256. //Read the file line by line
  257.  
  258. fseek (inFile, (offset_a), SEEK_SET );
  259. fread(&test_case,1,1,inFile);
  260. unsigned char test_backup = test_case; //Store the value to put in
  261.  
  262. test_case = (test_case & 0xf0) >> 4;
  263. dump_char = (test_backup & 0x0f);
  264.  
  265. for (int offset_b = 0; offset_b <= test_case; offset_b++){
  266. fwrite(&dump_char, 1, 1, outFile);
  267. }
  268.  
  269. offset_a++;
  270. }
  271.  
  272. fclose(inFile);
  273. fclose(outFile);
  274. }
  275.  
  276. return 0;
  277. }
  278.  
  279. /**
  280. int packbits_rle_compress(char *get_infile, char *get_outfile, char *get_offset){
  281. int offset_a = 0; //Starting offset
  282. unsigned char test_case = 0; //used for getting the test case
  283. unsigned char dump_char = 0; //dump this into a file
  284.  
  285. FILE* inFile = fopen(get_infile, "rb");
  286.  
  287. FILE* outFile = fopen(get_outfile, "wb");
  288.  
  289. //Make sure we aren't reading a nonexistant or empty file first
  290. if (fileLength(inFile) < 1){
  291. printf("There was no data in the input file.");
  292. exit(1);
  293. }
  294. else{
  295. }
  296.  
  297. return 0;
  298. }
  299. **/
  300.  
  301. int lzw_decompress(char *get_infile, char *get_outfile, char *get_offset){
  302. int offset_a = 0; //Starting offset
  303. string entry; //string for dictionary entries
  304. //int dict_size = 256; - Unused variable
  305. //char * result; - Unused variable
  306. std::string work_string;
  307. uint16_t code_a;
  308. //char get_char1; - Unused variable
  309. uint16_t max_code = 32767;
  310.  
  311. std::unordered_map<uint16_t, std::string> mymap( (max_code * 11) / 10 );
  312.  
  313. printf("\n Input: %s, Output: %s, Offset: %s", get_infile, get_outfile, get_offset);
  314.  
  315. FILE* inFile = fopen(get_infile, "rb");
  316.  
  317. FILE* outFile = fopen(get_outfile, "wb"); //Where to have the outpit stored
  318.  
  319. int get_size = fileLength(get_infile);
  320.  
  321. if (get_size < 1){
  322. printf("There was no data in the input file.");
  323. exit(1);
  324. }
  325. else{
  326. //Fill the dictionary with 1-character strings
  327. offset_a = atoi(get_offset);
  328.  
  329. for (int i = 0; i < 256; i ++){
  330. mymap[i] = string(1, i);
  331. }
  332.  
  333. //fseek (inFile, offset_a, SEEK_SET);
  334. //fread(&work_string,1,1,inFile);
  335. //fwrite(&work_string, 1, 1, outFile);
  336.  
  337. //offset_a++;
  338.  
  339.  
  340. uint16_t next_code = 257;
  341.  
  342. fseek (inFile, offset_a, SEEK_SET);
  343.  
  344. for (int i = 0; i < get_size; i ++){
  345. fread(&code_a,1,2,inFile);
  346.  
  347. if(mymap.find(code_a) == mymap.end()){
  348. mymap[code_a] = work_string + work_string[0];
  349. }
  350. fwrite(&mymap[code_a], 1, sizeof(mymap[code_a]), outFile);
  351. if (work_string.size() && next_code <= max_code){
  352. mymap[next_code++] = work_string + mymap[code_a][0];
  353. }
  354.  
  355. offset_a += 2;
  356.  
  357. work_string = mymap[code_a];
  358. }
  359.  
  360. fclose(inFile);
  361. fclose(outFile);
  362. }
  363.  
  364. return 0;
  365. }
  366.  
  367. int huffman_decompress(char *get_infile, char *get_outfile, char *get_offset){
  368. //TO DO list:
  369. //Proper memory allocation
  370. //Bitwise IO (since Huffman does bit level compression)
  371.  
  372. int offset_a = 0; //Starting offset
  373. huffman_node base_node = {0, 0, NULL}; //Root node
  374. unsigned char used_bits = 0;
  375. unsigned char dump_char = 0; //dump this into a file
  376. //int test_arg3_length = 0; //where to begin searching in the file
  377.  
  378. printf("\n Input: %s, Output: %s, Offset: %s", get_infile, get_outfile, get_offset);
  379.  
  380. FILE* inFile = fopen(get_infile, "rb");
  381.  
  382. FILE* outFile = fopen(get_outfile, "wb");
  383.  
  384. //Get the offset to begin the decompression
  385.  
  386. int get_size = fileLength(get_infile);
  387.  
  388. //An unrolled version of the data saving code
  389. fseek(inFile, offset_a, SEEK_SET);
  390. fread(&base_node.freq,1,sizeof(unsigned int),inFile);
  391. offset_a += sizeof(unsigned int);
  392.  
  393. fseek(inFile, offset_a, SEEK_SET);
  394. fread(&base_node.data,1,sizeof(char),inFile);
  395. offset_a += sizeof(char);
  396.  
  397. fseek(inFile, offset_a, SEEK_SET);
  398. fread(&base_node.left_right,1,sizeof(bool),inFile);
  399. offset_a += sizeof(bool);
  400.  
  401. fseek(inFile, offset_a, SEEK_SET);
  402.  
  403. //Make sure we aren't reading a nonexistant or empty file first
  404. if (get_size < 1){
  405. printf("There was no data in the input file.");
  406. exit(1);
  407. }
  408. else{
  409.  
  410. printf("\n Time to decompress %d bytes \n", get_size);
  411.  
  412. //test_arg3_length = (unsigned)strlen(get_offset);
  413.  
  414. offset_a = atoi(get_offset);
  415.  
  416. std::map <char*, char> store_chars_here;
  417.  
  418. //Try to not make a size limitation immediately
  419.  
  420. char * temp_array = (char*) malloc(sizeof(char) * get_size);
  421. char * result_array = (char*) malloc(sizeof(char) * get_size);
  422. char * new_char_array = (char*) malloc(sizeof(char) * get_size);
  423.  
  424. printf("\n Offset: %d \n", offset_a);
  425.  
  426. while (offset_a < get_size)
  427. {
  428. fseek (inFile, (offset_a), SEEK_SET );
  429. fread(&new_char_array,1,1,inFile);
  430.  
  431. fwrite(&dump_char, 1, (sizeof(result_array) / sizeof(result_array[0])), outFile);
  432.  
  433. temp_array = new_char_array;
  434. strcat(temp_array, new_char_array);
  435. const char * get_result;
  436.  
  437. if (base_node.freq > 0){
  438. //0 = left, 1 = right
  439. if(base_node.left_right == 0) {
  440. get_result = &(base_node.data);
  441.  
  442. fseek(inFile, offset_a, SEEK_SET);
  443. fread(&base_node.freq,1,sizeof(unsigned int),inFile);
  444. offset_a += sizeof(unsigned int);
  445.  
  446. fseek(inFile, offset_a, SEEK_SET);
  447. fread(&base_node.data,1,sizeof(char),inFile);
  448. offset_a += sizeof(char);
  449.  
  450. fseek(inFile, offset_a, SEEK_SET);
  451. fread(&base_node.left_right,1,sizeof(bool),inFile);
  452. offset_a += sizeof(bool);
  453.  
  454. fseek (inFile, offset_a, SEEK_SET );
  455. strcat(result_array, get_result);
  456. strcat(temp_array, result_array);
  457. }
  458. else if(base_node.left_right == 1) {
  459. get_result = &(base_node.data);
  460.  
  461. fseek(inFile, offset_a, SEEK_SET);
  462. fread(&base_node.freq,1,sizeof(unsigned int),inFile);
  463. offset_a += sizeof(unsigned int);
  464.  
  465. fseek(inFile, offset_a, SEEK_SET);
  466. fread(&base_node.data,1,sizeof(char),inFile);
  467. offset_a += sizeof(char);
  468.  
  469. fseek(inFile, offset_a, SEEK_SET);
  470. fread(&base_node.left_right,1,sizeof(bool),inFile);
  471. offset_a += sizeof(bool);
  472.  
  473. fseek (inFile, (offset_a), SEEK_SET );
  474. strcat(result_array, get_result);
  475. strcat(temp_array, result_array);
  476. }
  477. }
  478. else {
  479. get_result = &(base_node.data);
  480. strcat(result_array, get_result);
  481. fwrite(result_array, 1, (sizeof(result_array) / sizeof(result_array[0])), outFile);
  482. free(temp_array);
  483. temp_array = nullptr;
  484. }
  485. offset_a++;
  486. }
  487.  
  488.  
  489. fclose(inFile);
  490. fclose(outFile);
  491. }
  492.  
  493. return 0;
  494. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement