Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include "gzip.h"
- #include "huffman.h"
- FILE* gzFile;
- long getOrigFileSize(FILE * gzFile);
- int isDynamicHuffman(unsigned char rb);
- void bits2String(char *strBits, unsigned char byte);
- int getHuffmanCodes(int necessaryBits, int exp);
- void lengthLengthAlfa();
- void lengthCodes(int lengths []);
- unsigned char byte; //vari�vel tempor�ria para armazenar um byte lido directamente do ficheiro
- unsigned int rb = 0; //�ltimo byte lido (poder� ter mais que 8 bits, se tiverem sobrado alguns de leituras anteriores)
- char availBits = 0;
- int hLit, hDist, hcLen;
- HuffmanTree* tree=NULL;
- int main() {
- char oneChar;
- long fileSize;
- long origFileSize;
- int numBlocks = 0;
- char needBits = 0;
- gzipHeader gzh;
- gzFile = fopen("C:\\Users\\Alvineca\\CLionProjects\\ex2\\FAQ.txt.gz", "rb");
- if (!gzFile) {
- perror("C:\\Users\\Alvineca\\CLionProjects\\ex2\\FAQ.txt.gz"); exit(EXIT_FAILURE);
- }
- fseek(gzFile, 0L, SEEK_END);
- fileSize = ftell(gzFile);
- fseek(gzFile, 0L, SEEK_SET);
- //ler tamanho do ficheiro original (acrescentar: e definir Vector com s�mbolos
- origFileSize = getOrigFileSize(gzFile);
- //--- ler cabe�alho
- int erro = getHeader(gzFile, &gzh);
- if (erro != 0)
- {
- printf ("Formato inválido!!!");
- return -1;
- }
- //--- Para todos os blocos encontrados
- char BFINAL;
- do
- {
- //--- ler o block header: primeiro byte depois do cabe�alho do ficheiro
- needBits = 3;
- if (availBits < needBits)
- {
- fread(&byte, 1, 1, gzFile);
- rb = (byte << availBits) | rb;
- availBits += 8;
- }
- //obter BFINAL
- //ver se � o �ltimo bloco
- BFINAL = rb & 0x01; //primeiro bit � o menos significativo
- rb = rb >> 1; //descartar o bit correspondente ao BFINAL
- availBits -=1;
- //analisar block header e ver se � huffman din�mico
- if (!isDynamicHuffman(rb)) //ignorar bloco se n�o for Huffman din�mico
- continue;
- rb = rb >> 2; //descartar os 2 bits correspondentes ao BTYPE
- availBits -= 2;
- //--- Se chegou aqui --> compactado com Huffman din�mico --> descompactar
- hLit = getHuffmanCodes(5,0x1F);
- hDist = getHuffmanCodes(5,0x1F);
- hcLen = getHuffmanCodes(4,0x0F);
- printf("\nValores dos codigos:\n____________________________________\n| HLIT: %d | HDIST: %d | HCLEN: %d |\n|__________|___________|___________|\n\n\n", hLit, hDist, hcLen);
- lengthLengthAlfa();
- //actualizar n�mero de blocos analisados
- numBlocks++;
- }while(BFINAL == 0);
- //terminações
- fclose(gzFile);
- printf("End: %d bloco(s) analisado(s).\n", numBlocks);
- //teste da função bits2String: RETIRAR antes de criar o executável final
- char str[9];
- bits2String(str, 0x03);
- printf("%s\n", str);
- //RETIRAR antes de criar o executável final-
- system("PAUSE");
- return EXIT_SUCCESS;
- }
- int getHuffmanCodes(int necessaryBits, int exp){
- int code = 0;
- while (availBits < necessaryBits){
- fread(&byte, 1, 1, gzFile);
- rb = byte << availBits | rb;
- availBits += 8;
- }
- code = rb & exp; //retirar últimos len(exp) bits
- rb = rb >> necessaryBits;
- availBits -= necessaryBits;
- return code;
- }
- void lengthLengthAlfa(){
- int sequence [] = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
- int length = 0;
- int codeLength []={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- hcLen+=4;
- printf("Comprimento dos codigos do 'alfabeto de comprimentos de codigos':\n\n");
- for(int i = 0 ;i < hcLen ; i++){
- length = getHuffmanCodes(3,0x07); //0x07- 3 bits menos significativos
- codeLength[sequence[i]] = length;
- printf("index: %d -> comprimento: %d\n",sequence[i], codeLength[sequence[i]]);
- }
- lengthCodes(codeLength);
- }
- void lengthCodes(int lengths []){
- int codes []={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- int maxBits=0;
- for(int i=0; i<hcLen; i++){
- if(maxBits<lengths[i]){
- maxBits=lengths[i];
- }
- }
- printf("MaxBits= %d\n", maxBits);
- char bl_count [maxBits+1];
- for(int i=1; i<=maxBits; i++) {
- int count=0;
- for (int j = 0; j < 19; j++) {
- if(lengths[j]==i){
- count++;
- }
- }
- bl_count[i]=count;
- printf("bl_count[%d]=%d\n", i, bl_count[i]);
- }
- int code=0;
- bl_count[0]=0; // bl_count[maxBits+1]
- int firstCode[maxBits];
- for(int bits=1; bits<=maxBits; bits++){
- code = (code + bl_count[bits-1]) << 1;
- firstCode[bits] = code; // primeiro codigo de cada comprimento
- printf("firstCode[%d] = %d\n", bits, firstCode[bits]);
- }
- for(int i=1; i<=maxBits; i++) {
- int n = firstCode[i];
- for (int j = 0; j < 19; j++) {
- if(lengths[j]==i){
- codes[j] = n;
- n++;
- }
- }
- }
- for(int i=0; i<19; i++){
- printf("codes[%d]= %d\n", i, codes[i]);
- }
- char binario [19];
- tree=createHFTree();
- printf("VER ISTO--------------------------------------");
- for (int i=0; i<19; i++){
- if(lengths[i]!=0){
- char s [lengths[i]];
- int temp=codes[i];
- int bin;
- printf("\n\ncodigo %d: ", codes[i]);
- for(int j=0; j<lengths[i];j++) {
- bin = temp % 2;
- temp = temp / 2;
- if (bin == 1) {
- s[lengths[i] - j] = '1';
- printf("1");
- }
- else {
- s[lengths[i] - j] = '0';
- printf("0");
- }
- }
- printf("\n\n");
- }
- }
- }
- int isDynamicHuffman(unsigned char rb)
- {
- unsigned char BTYPE = rb & 0x03;
- if (BTYPE == 0) //--> sem compress�o
- {
- printf("Ignorando bloco: sem compacta��o!!!\n");
- return 0;
- }
- else if (BTYPE == 1)
- {
- printf("Ignorando bloco: compactado com Huffman fixo!!!\n");
- return 0;
- }
- else if (BTYPE == 3)
- {
- printf("Ignorando bloco: BTYPE = reservado!!!\n");
- return 0;
- }
- else
- return 1;
- }
- //L� o cabe�alho do ficheiro gzip: devolve erro (-1) se o formato for inválido, ou devolve 0 se ok
- int getHeader(FILE *gzFile, gzipHeader *gzh) //obt�m cabe�alho
- {
- unsigned char byte;
- //Identica��o 1 e 2: valores fixos
- fread(&byte, 1, 1, gzFile);
- (*gzh).ID1 = byte;
- if ((*gzh).ID1 != 0x1f) return -1; //erro no cabe�alho
- fread(&byte, 1, 1, gzFile);
- (*gzh).ID2 = byte;
- if ((*gzh).ID2 != 0x8b) return -1; //erro no cabe�alho
- //Método de compress�o (deve ser 8 para denotar o deflate)
- fread(&byte, 1, 1, gzFile);
- (*gzh).CM = byte;
- if ((*gzh).CM != 0x08) return -1; //erro no cabe�alho
- //Flags
- fread(&byte, 1, 1, gzFile);
- unsigned char FLG = byte;
- //MTIME
- char lenMTIME = 4;
- fread(&byte, 1, 1, gzFile);
- (*gzh).MTIME = byte;
- for (int i = 1; i <= lenMTIME - 1; i++)
- {
- fread(&byte, 1, 1, gzFile);
- (*gzh).MTIME = (byte << 8) + (*gzh).MTIME;
- }
- //XFL (not processed...)
- fread(&byte, 1, 1, gzFile);
- (*gzh).XFL = byte;
- //OS (not processed...)
- fread(&byte, 1, 1, gzFile);
- (*gzh).OS = byte;
- //--- Check Flags
- (*gzh).FLG_FTEXT = (char)(FLG & 0x01);
- (*gzh).FLG_FHCRC = (char)((FLG & 0x02) >> 1);
- (*gzh).FLG_FEXTRA = (char)((FLG & 0x04) >> 2);
- (*gzh).FLG_FNAME = (char)((FLG & 0x08) >> 3);
- (*gzh).FLG_FCOMMENT = (char)((FLG & 0x10) >> 4);
- //FLG_EXTRA
- if ((*gzh).FLG_FEXTRA == 1)
- {
- //ler 2 bytes XLEN + XLEN bytes de extra field
- //1� byte: LSB, 2�: MSB
- char lenXLEN = 2;
- fread(&byte, 1, 1, gzFile);
- (*gzh).xlen = byte;
- fread(&byte, 1, 1, gzFile);
- (*gzh).xlen = (byte << 8) + (*gzh).xlen;
- (*gzh).extraField = new unsigned char[(*gzh).xlen];
- //ler extra field (deixado como est�, i.e., n�o processado...)
- for (int i = 0; i <= (*gzh).xlen - 1; i++)
- {
- fread(&byte, 1, 1, gzFile);
- (*gzh).extraField[i] = byte;
- }
- }
- else
- {
- (*gzh).xlen = 0;
- (*gzh).extraField = 0;
- }
- //FLG_FNAME: ler nome original
- if ((*gzh).FLG_FNAME == 1)
- {
- (*gzh).fName = new char[1024];
- unsigned int i = 0;
- do
- {
- fread(&byte, 1, 1, gzFile);
- if (i <= 1023) //guarda no m�ximo 1024 caracteres no array
- (*gzh).fName[i] = byte;
- i++;
- }while(byte != 0);
- if (i > 1023)
- (*gzh).fName[1023] = 0; //apesar de nome incompleto, garantir que o array termina em 0
- }
- else
- (*gzh).fName = 0;
- //FLG_FCOMMENT: ler coment�rio
- if ((*gzh).FLG_FCOMMENT == 1)
- {
- (*gzh).fComment = new char[1024];
- unsigned int i = 0;
- do
- {
- fread(&byte, 1, 1, gzFile);
- if (i <= 1023) //guarda no m�ximo 1024 caracteres no array
- (*gzh).fComment[i] = byte;
- i++;
- }while(byte != 0);
- if (i > 1023)
- (*gzh).fComment[1023] = 0; //apesar de coment�rio incompleto, garantir que o array termina em 0
- }
- else
- (*gzh).fComment = 0;
- //FLG_FHCRC (not processed...)
- if ((*gzh).FLG_FHCRC == 1)
- {
- (*gzh).HCRC = new unsigned char[2];
- fread(&byte, 1, 1, gzFile);
- (*gzh).HCRC[0] = byte;
- fread(&byte, 1, 1, gzFile);
- (*gzh).HCRC[1] = byte;
- }
- else
- (*gzh).HCRC = 0;
- return 0;
- }
- long getOrigFileSize(FILE * gzFile)
- {
- //salvaguarda posi��o actual do ficheiro
- long fp = ftell(gzFile);
- //�ltimos 4 bytes = ISIZE;
- fseek(gzFile, -4, SEEK_END);
- //determina ISIZE (s� correcto se cabe em 32 bits)
- unsigned long sz = 0;
- unsigned char byte;
- fread(&byte, 1, 1, gzFile);
- sz = byte;
- for (int i = 0; i <= 2; i++)
- {
- fread(&byte, 1, 1, gzFile);
- sz = (byte << 8*(i+1)) + sz;
- }
- //restaura file pointer
- fseek(gzFile, fp, SEEK_SET);
- return sz;
- }
- void bits2String(char *strBits, unsigned char byte)
- {
- char mask = 0x01; //get LSbit
- strBits[8] = 0;
- for (char bit, i = 7; i >= 0; i--)
- {
- bit = byte & mask;
- strBits[i] = bit +48; //converter valor numérico para o caracter alfanumérico correspondente
- byte = byte >> 1;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement