Advertisement
Guest User

es conas

a guest
Nov 20th, 2017
69
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 10.81 KB | None | 0 0
  1. #include <iostream>
  2.  
  3. #include "gzip.h"
  4. #include "huffman.h"
  5.  
  6. FILE* gzFile;
  7.  
  8. long getOrigFileSize(FILE * gzFile);
  9. int isDynamicHuffman(unsigned char rb);
  10. void bits2String(char *strBits, unsigned char byte);
  11. int getHuffmanCodes(int necessaryBits, int exp);
  12. void lengthLengthAlfa();
  13. void lengthCodes(int lengths []);
  14.  
  15. unsigned char byte; //vari�vel tempor�ria para armazenar um byte lido directamente do ficheiro
  16. unsigned int rb = 0; //�ltimo byte lido (poder� ter mais que 8 bits, se tiverem sobrado alguns de leituras anteriores)
  17. char availBits = 0;
  18. int hLit, hDist, hcLen;
  19. HuffmanTree* tree=NULL;
  20.  
  21. int main() {
  22. char oneChar;
  23. long fileSize;
  24. long origFileSize;
  25. int numBlocks = 0;
  26. char needBits = 0;
  27. gzipHeader gzh;
  28.  
  29.  
  30.  
  31. gzFile = fopen("C:\\Users\\Alvineca\\CLionProjects\\ex2\\FAQ.txt.gz", "rb");
  32. if (!gzFile) {
  33. perror("C:\\Users\\Alvineca\\CLionProjects\\ex2\\FAQ.txt.gz"); exit(EXIT_FAILURE);
  34. }
  35.  
  36. fseek(gzFile, 0L, SEEK_END);
  37. fileSize = ftell(gzFile);
  38. fseek(gzFile, 0L, SEEK_SET);
  39.  
  40. //ler tamanho do ficheiro original (acrescentar: e definir Vector com s�mbolos
  41. origFileSize = getOrigFileSize(gzFile);
  42.  
  43. //--- ler cabe�alho
  44. int erro = getHeader(gzFile, &gzh);
  45. if (erro != 0)
  46. {
  47. printf ("Formato inválido!!!");
  48. return -1;
  49. }
  50.  
  51. //--- Para todos os blocos encontrados
  52. char BFINAL;
  53.  
  54. do
  55. {
  56. //--- ler o block header: primeiro byte depois do cabe�alho do ficheiro
  57. needBits = 3;
  58. if (availBits < needBits)
  59. {
  60. fread(&byte, 1, 1, gzFile);
  61. rb = (byte << availBits) | rb;
  62. availBits += 8;
  63. }
  64.  
  65. //obter BFINAL
  66. //ver se � o �ltimo bloco
  67. BFINAL = rb & 0x01; //primeiro bit � o menos significativo
  68. rb = rb >> 1; //descartar o bit correspondente ao BFINAL
  69. availBits -=1;
  70.  
  71. //analisar block header e ver se � huffman din�mico
  72. if (!isDynamicHuffman(rb)) //ignorar bloco se n�o for Huffman din�mico
  73. continue;
  74. rb = rb >> 2; //descartar os 2 bits correspondentes ao BTYPE
  75. availBits -= 2;
  76.  
  77. //--- Se chegou aqui --> compactado com Huffman din�mico --> descompactar
  78.  
  79. hLit = getHuffmanCodes(5,0x1F);
  80. hDist = getHuffmanCodes(5,0x1F);
  81. hcLen = getHuffmanCodes(4,0x0F);
  82. printf("\nValores dos codigos:\n____________________________________\n| HLIT: %d | HDIST: %d | HCLEN: %d |\n|__________|___________|___________|\n\n\n", hLit, hDist, hcLen);
  83.  
  84. lengthLengthAlfa();
  85.  
  86. //actualizar n�mero de blocos analisados
  87.  
  88. numBlocks++;
  89. }while(BFINAL == 0);
  90.  
  91. //terminações
  92. fclose(gzFile);
  93. printf("End: %d bloco(s) analisado(s).\n", numBlocks);
  94.  
  95.  
  96. //teste da função bits2String: RETIRAR antes de criar o executável final
  97. char str[9];
  98. bits2String(str, 0x03);
  99. printf("%s\n", str);
  100.  
  101.  
  102. //RETIRAR antes de criar o executável final-
  103. system("PAUSE");
  104. return EXIT_SUCCESS;
  105. }
  106.  
  107. int getHuffmanCodes(int necessaryBits, int exp){
  108. int code = 0;
  109. while (availBits < necessaryBits){
  110. fread(&byte, 1, 1, gzFile);
  111. rb = byte << availBits | rb;
  112. availBits += 8;
  113. }
  114.  
  115. code = rb & exp; //retirar últimos len(exp) bits
  116. rb = rb >> necessaryBits;
  117. availBits -= necessaryBits;
  118.  
  119. return code;
  120. }
  121.  
  122. void lengthLengthAlfa(){
  123.  
  124. int sequence [] = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
  125. int length = 0;
  126. int codeLength []={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
  127.  
  128. hcLen+=4;
  129.  
  130. printf("Comprimento dos codigos do 'alfabeto de comprimentos de codigos':\n\n");
  131.  
  132. for(int i = 0 ;i < hcLen ; i++){
  133. length = getHuffmanCodes(3,0x07); //0x07- 3 bits menos significativos
  134. codeLength[sequence[i]] = length;
  135. printf("index: %d -> comprimento: %d\n",sequence[i], codeLength[sequence[i]]);
  136. }
  137.  
  138. lengthCodes(codeLength);
  139.  
  140. }
  141.  
  142. void lengthCodes(int lengths []){
  143.  
  144. int codes []={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
  145. int maxBits=0;
  146.  
  147. for(int i=0; i<hcLen; i++){
  148. if(maxBits<lengths[i]){
  149. maxBits=lengths[i];
  150. }
  151. }
  152. printf("MaxBits= %d\n", maxBits);
  153. char bl_count [maxBits+1];
  154.  
  155. for(int i=1; i<=maxBits; i++) {
  156. int count=0;
  157. for (int j = 0; j < 19; j++) {
  158. if(lengths[j]==i){
  159. count++;
  160. }
  161. }
  162. bl_count[i]=count;
  163. printf("bl_count[%d]=%d\n", i, bl_count[i]);
  164. }
  165.  
  166. int code=0;
  167.  
  168. bl_count[0]=0; // bl_count[maxBits+1]
  169.  
  170. int firstCode[maxBits];
  171.  
  172. for(int bits=1; bits<=maxBits; bits++){
  173. code = (code + bl_count[bits-1]) << 1;
  174. firstCode[bits] = code; // primeiro codigo de cada comprimento
  175. printf("firstCode[%d] = %d\n", bits, firstCode[bits]);
  176. }
  177.  
  178.  
  179.  
  180. for(int i=1; i<=maxBits; i++) {
  181. int n = firstCode[i];
  182. for (int j = 0; j < 19; j++) {
  183. if(lengths[j]==i){
  184. codes[j] = n;
  185. n++;
  186. }
  187.  
  188. }
  189. }
  190.  
  191. for(int i=0; i<19; i++){
  192. printf("codes[%d]= %d\n", i, codes[i]);
  193. }
  194.  
  195. char binario [19];
  196.  
  197. tree=createHFTree();
  198.  
  199. printf("VER ISTO--------------------------------------");
  200. for (int i=0; i<19; i++){
  201. if(lengths[i]!=0){
  202. char s [lengths[i]];
  203. int temp=codes[i];
  204. int bin;
  205. printf("\n\ncodigo %d: ", codes[i]);
  206. for(int j=0; j<lengths[i];j++) {
  207. bin = temp % 2;
  208. temp = temp / 2;
  209. if (bin == 1) {
  210. s[lengths[i] - j] = '1';
  211. printf("1");
  212. }
  213. else {
  214. s[lengths[i] - j] = '0';
  215. printf("0");
  216. }
  217. }
  218. printf("\n\n");
  219. }
  220. }
  221.  
  222. }
  223.  
  224. int isDynamicHuffman(unsigned char rb)
  225. {
  226. unsigned char BTYPE = rb & 0x03;
  227.  
  228. if (BTYPE == 0) //--> sem compress�o
  229. {
  230. printf("Ignorando bloco: sem compacta��o!!!\n");
  231. return 0;
  232. }
  233. else if (BTYPE == 1)
  234. {
  235. printf("Ignorando bloco: compactado com Huffman fixo!!!\n");
  236. return 0;
  237. }
  238. else if (BTYPE == 3)
  239. {
  240. printf("Ignorando bloco: BTYPE = reservado!!!\n");
  241. return 0;
  242. }
  243. else
  244. return 1;
  245. }
  246.  
  247. //L� o cabe�alho do ficheiro gzip: devolve erro (-1) se o formato for inválido, ou devolve 0 se ok
  248. int getHeader(FILE *gzFile, gzipHeader *gzh) //obt�m cabe�alho
  249. {
  250. unsigned char byte;
  251.  
  252. //Identica��o 1 e 2: valores fixos
  253. fread(&byte, 1, 1, gzFile);
  254. (*gzh).ID1 = byte;
  255. if ((*gzh).ID1 != 0x1f) return -1; //erro no cabe�alho
  256.  
  257. fread(&byte, 1, 1, gzFile);
  258. (*gzh).ID2 = byte;
  259. if ((*gzh).ID2 != 0x8b) return -1; //erro no cabe�alho
  260.  
  261. //Método de compress�o (deve ser 8 para denotar o deflate)
  262. fread(&byte, 1, 1, gzFile);
  263. (*gzh).CM = byte;
  264. if ((*gzh).CM != 0x08) return -1; //erro no cabe�alho
  265.  
  266. //Flags
  267. fread(&byte, 1, 1, gzFile);
  268. unsigned char FLG = byte;
  269.  
  270. //MTIME
  271. char lenMTIME = 4;
  272. fread(&byte, 1, 1, gzFile);
  273. (*gzh).MTIME = byte;
  274. for (int i = 1; i <= lenMTIME - 1; i++)
  275. {
  276. fread(&byte, 1, 1, gzFile);
  277. (*gzh).MTIME = (byte << 8) + (*gzh).MTIME;
  278. }
  279.  
  280. //XFL (not processed...)
  281. fread(&byte, 1, 1, gzFile);
  282. (*gzh).XFL = byte;
  283.  
  284. //OS (not processed...)
  285. fread(&byte, 1, 1, gzFile);
  286. (*gzh).OS = byte;
  287.  
  288. //--- Check Flags
  289. (*gzh).FLG_FTEXT = (char)(FLG & 0x01);
  290. (*gzh).FLG_FHCRC = (char)((FLG & 0x02) >> 1);
  291. (*gzh).FLG_FEXTRA = (char)((FLG & 0x04) >> 2);
  292. (*gzh).FLG_FNAME = (char)((FLG & 0x08) >> 3);
  293. (*gzh).FLG_FCOMMENT = (char)((FLG & 0x10) >> 4);
  294.  
  295. //FLG_EXTRA
  296. if ((*gzh).FLG_FEXTRA == 1)
  297. {
  298. //ler 2 bytes XLEN + XLEN bytes de extra field
  299. //1� byte: LSB, 2�: MSB
  300. char lenXLEN = 2;
  301.  
  302. fread(&byte, 1, 1, gzFile);
  303. (*gzh).xlen = byte;
  304. fread(&byte, 1, 1, gzFile);
  305. (*gzh).xlen = (byte << 8) + (*gzh).xlen;
  306.  
  307. (*gzh).extraField = new unsigned char[(*gzh).xlen];
  308.  
  309. //ler extra field (deixado como est�, i.e., n�o processado...)
  310. for (int i = 0; i <= (*gzh).xlen - 1; i++)
  311. {
  312. fread(&byte, 1, 1, gzFile);
  313. (*gzh).extraField[i] = byte;
  314. }
  315. }
  316. else
  317. {
  318. (*gzh).xlen = 0;
  319. (*gzh).extraField = 0;
  320. }
  321.  
  322. //FLG_FNAME: ler nome original
  323. if ((*gzh).FLG_FNAME == 1)
  324. {
  325. (*gzh).fName = new char[1024];
  326. unsigned int i = 0;
  327. do
  328. {
  329. fread(&byte, 1, 1, gzFile);
  330. if (i <= 1023) //guarda no m�ximo 1024 caracteres no array
  331. (*gzh).fName[i] = byte;
  332. i++;
  333. }while(byte != 0);
  334. if (i > 1023)
  335. (*gzh).fName[1023] = 0; //apesar de nome incompleto, garantir que o array termina em 0
  336. }
  337. else
  338. (*gzh).fName = 0;
  339.  
  340. //FLG_FCOMMENT: ler coment�rio
  341. if ((*gzh).FLG_FCOMMENT == 1)
  342. {
  343. (*gzh).fComment = new char[1024];
  344. unsigned int i = 0;
  345. do
  346. {
  347. fread(&byte, 1, 1, gzFile);
  348. if (i <= 1023) //guarda no m�ximo 1024 caracteres no array
  349. (*gzh).fComment[i] = byte;
  350. i++;
  351. }while(byte != 0);
  352. if (i > 1023)
  353. (*gzh).fComment[1023] = 0; //apesar de coment�rio incompleto, garantir que o array termina em 0
  354. }
  355. else
  356. (*gzh).fComment = 0;
  357.  
  358.  
  359. //FLG_FHCRC (not processed...)
  360. if ((*gzh).FLG_FHCRC == 1)
  361. {
  362. (*gzh).HCRC = new unsigned char[2];
  363. fread(&byte, 1, 1, gzFile);
  364. (*gzh).HCRC[0] = byte;
  365. fread(&byte, 1, 1, gzFile);
  366. (*gzh).HCRC[1] = byte;
  367. }
  368. else
  369. (*gzh).HCRC = 0;
  370.  
  371. return 0;
  372. }
  373.  
  374. long getOrigFileSize(FILE * gzFile)
  375. {
  376. //salvaguarda posi��o actual do ficheiro
  377. long fp = ftell(gzFile);
  378.  
  379. //�ltimos 4 bytes = ISIZE;
  380. fseek(gzFile, -4, SEEK_END);
  381.  
  382. //determina ISIZE (s� correcto se cabe em 32 bits)
  383. unsigned long sz = 0;
  384. unsigned char byte;
  385. fread(&byte, 1, 1, gzFile);
  386. sz = byte;
  387. for (int i = 0; i <= 2; i++)
  388. {
  389. fread(&byte, 1, 1, gzFile);
  390. sz = (byte << 8*(i+1)) + sz;
  391. }
  392.  
  393.  
  394. //restaura file pointer
  395. fseek(gzFile, fp, SEEK_SET);
  396.  
  397. return sz;
  398. }
  399. void bits2String(char *strBits, unsigned char byte)
  400. {
  401. char mask = 0x01; //get LSbit
  402.  
  403. strBits[8] = 0;
  404. for (char bit, i = 7; i >= 0; i--)
  405. {
  406. bit = byte & mask;
  407. strBits[i] = bit +48; //converter valor numérico para o caracter alfanumérico correspondente
  408. byte = byte >> 1;
  409. }
  410. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement