Untitled

/*Author: Rui Pedro Paiva
Teoria da Informação, LEI, 2007/2008*/

#include <cstdlib>
#include "gzip.h"


//função principal, a qual gere todo o processo de descompactação
int main()
{
    //--- Gzip file management variables
    FILE *gzFile;  //ponteiro para o ficheiro a abrir
    long fileSize;
    long origFileSize;
    int numBlocks = 0;
    gzipHeader gzh;
    unsigned char byte;  //variável temporária para armazenar um byte lido directamente do ficheiro
    unsigned int rb = 0;  //último byte lido (poderá ter mais que 8 bits, se tiverem sobrado alguns de leituras anteriores)
    char needBits = 0, availBits = 0;
    char *vetor_ficheiro;
    HuffmanTree *hft, *hft_hlit, *hft_hdist;


    //--- obter ficheiro a descompactar
    char fileName[] = "FAQ.txt.gz";


    //--- processar ficheiro
    gzFile = fopen(fileName, "rb");
    fseek(gzFile, 0L, SEEK_END);
    fileSize = ftell(gzFile);
    fseek(gzFile, 0L, SEEK_SET);

    //ler tamanho do ficheiro original (acrescentar: e definir Vector com símbolos
    origFileSize = getOrigFileSize(gzFile);


    //--- ler cabeçalho
    int erro = getHeader(gzFile, &gzh);
    if (erro != 0)
    {
        printf ("Formato inválido!!!");
        return -1;
    }

    //--- Para todos os blocos encontrados
    char BFINAL;

    do
    {
        //--- ler o block header: primeiro byte depois do cabeçalho do ficheiro
        needBits = 3;
        if (availBits < needBits)
        {
            fread(&byte, 1, 1, gzFile);
            rb = (byte << availBits) | rb;
            availBits += 8;
        }

        //obter BFINAL
        //ver se é o último bloco
        BFINAL = rb  & 0x01; //primeiro bit é o menos significativo
        rb = rb >> 1; //descartar o bit correspondente ao BFINAL
        availBits -=1;

        //analisar block header e ver se é huffman dinâmico
        if(!isDynamicHuffman(rb))  //ignorar bloco se não for Huffman dinâmico
            continue;
        rb = rb >> 2; //descartar os 2 bits correspondentes ao BTYPE
        availBits -= 2;

        int HLIT, HDIST, HCLEN;

        HLIT = read_data(5,&availBits,&rb,gzFile);
        printf("HLIT = %d\n", HLIT);

        HDIST = read_data(5,&availBits,&rb,gzFile);
        printf("HDIST = %d\n", HDIST);

        HCLEN = read_data(4,&availBits,&rb,gzFile);
        printf("HCLEN = %d\n", HCLEN);

        unsigned int comps[19] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
        int posi[19] = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
        int auxiliar[19] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
        int comp=0;

        for(int i = 0; i<(HCLEN + 4) ; i++){
            auxiliar[i]+=read_data(3,&availBits,&rb,gzFile);
        }
        for(int i = 0; i<19; i++){
            comps[posi[i]]+=auxiliar[i];
        }
        for(int i = 0; i<19; i++){
            printf("%d ", comps[i]);
        }

        hft = compcode_to_huffman(comps,19);

        //Semana 3 - pergunta 4
        unsigned int *length_codes1;
        int read_size =HLIT+257;
        length_codes1= (unsigned int*)malloc(read_size*sizeof(unsigned int));
        length_codes1 = get_length_codes(hft, &rb, &availBits, read_size, gzFile);

        //Semana 4 - pergunta 5
        printf("\n\n");
        unsigned int *length_codes2;
        read_size= HDIST+1;
        length_codes2= (unsigned int*)malloc(read_size*sizeof(unsigned int));
        length_codes2 = get_length_codes(hft, &rb, &availBits, read_size, gzFile);

        //Semana 5- pergunta 6

        hft_hlit = compcode_to_huffman(length_codes1,HLIT+257);
        hft_hdist= compcode_to_huffman(length_codes2,HDIST+1);

        int pos_vetor=0, read_bits, copy_length, dist_node, back_dist, pos_aux;
        bool end_block=false;
        //pode haver mais blocos
        int array_bits_hlit[29]= {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};
        int array_length_hlit[29]= {3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258};

        int array_bits_hdist[30]= {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
        int array_distance_hdist[30]= {1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577};

        unsigned int bit;
        int i_node;
        int n=0;
        bit=0;
        char bits[10];
    	vetor_ficheiro= (char*)malloc(sizeof(char)*origFileSize);
        while(!end_block){
            while(1){
                bit= read_data(1, &availBits, &rb, gzFile);
                i_node= nextNode(hft_hlit, '0'+bit);
                if(i_node>=-1){
                    if(i_node==256){
                        end_block=true;
                        break;
                    }
                    else if(i_node<256){
                    	printf("%d\n", i_node);
                    	printf("%d\n", pos_vetor);
                    	vetor_ficheiro[0]='0';
                    	printf("%s\n", "sou um teste");
                        vetor_ficheiro[pos_vetor]=i_node;

                    	printf("teste2\n");
                        pos_vetor++;

                    	printf("teste2\n");
                    }
                    else{
                    	printf("teste1\n");
                        pos_aux= i_node -257;
                        if(i_node<=264 ||i_node== 285)
                            copy_length= array_length_hlit[pos_aux];
                        else{
                            read_bits= array_bits_hlit[pos_aux];
                            bit= read_data(read_bits, &availBits, &rb, gzFile);

                            copy_length= array_length_hlit[pos_aux]+bit;
                        }

                        do{
                            bit= read_data(1, &availBits, &rb, gzFile);
                            dist_node= nextNode(hft_hdist, '0' + bit);
                        }while(dist_node==-2);

                        read_bits= array_bits_hdist[dist_node];

                        bit=0;
                        if(dist_node>=4)
                            bit= read_data(read_bits, &availBits, &rb, gzFile);

                        back_dist= bit+array_distance_hdist[dist_node];

                        for(int i=0; i<copy_length; i++)
                            vetor_ficheiro[pos_vetor+i]= vetor_ficheiro[pos_vetor-back_dist+i];
                        pos_vetor+=copy_length;
                        printf("n-> %d\n",n);
                        n++;
                    }

                    resetCurNode(hft_hlit);
                    resetCurNode(hft_hdist);
                    break;
                }
            }
        }

        //actualizar número de blocos analisados
        numBlocks++;

    }while(BFINAL == 0);


    //terminações
    FILE *fp;
    fp= fopen("final.txt","w+");

    fprintf(fp,"%s",vetor_ficheiro);
    fclose(fp);


    fclose(gzFile);

    printf("End: %d bloco(s) analisado(s).\n", numBlocks);


    //RETIRAR antes de criar o executável final
    system("PAUSE");
    return EXIT_SUCCESS;
}


//---------------------------------------------------------------
//Lê o cabeçalho do ficheiro gzip: devolve erro (-1) se o formato for inválidodevolve, ou 0 se ok
int getHeader(FILE *gzFile, gzipHeader *gzh) //obtém cabeçalho
{
    unsigned char byte;

    //Identicação 1 e 2: valores fixos
    fread(&byte, 1, 1, gzFile);
    (*gzh).ID1 = byte;
    if ((*gzh).ID1 != 0x1f) return -1; //erro no cabeçalho

    fread(&byte, 1, 1, gzFile);
    (*gzh).ID2 = byte;
    if ((*gzh).ID2 != 0x8b) return -1; //erro no cabeçalho

    //Método de compressão (deve ser 8 para denotar o deflate)
    fread(&byte, 1, 1, gzFile);
    (*gzh).CM = byte;
    if ((*gzh).CM != 0x08) return -1; //erro no cabeçalho

    //Flags
    fread(&byte, 1, 1, gzFile);
    unsigned char FLG = byte;

    //MTIME
    char lenMTIME = 4;
    fread(&byte, 1, 1, gzFile);
    (*gzh).MTIME = byte;
    for (int i = 1; i <= lenMTIME - 1; i++)
    {
        fread(&byte, 1, 1, gzFile);
        (*gzh).MTIME = (byte << 8) + (*gzh).MTIME;
    }

    //XFL (not processed...)
    fread(&byte, 1, 1, gzFile);
    (*gzh).XFL = byte;

    //OS (not processed...)
    fread(&byte, 1, 1, gzFile);
    (*gzh).OS = byte;

    //--- Check Flags
    (*gzh).FLG_FTEXT = (char)(FLG & 0x01);
    (*gzh).FLG_FHCRC = (char)((FLG & 0x02) >> 1);
    (*gzh).FLG_FEXTRA = (char)((FLG & 0x04) >> 2);
    (*gzh).FLG_FNAME = (char)((FLG & 0x08) >> 3);
    (*gzh).FLG_FCOMMENT = (char)((FLG & 0x10) >> 4);

    //FLG_EXTRA
    if ((*gzh).FLG_FEXTRA == 1)
    {
        //ler 2 bytes XLEN + XLEN bytes de extra field
        //1º byte: LSB, 2º: MSB
        char lenXLEN = 2;

        fread(&byte, 1, 1, gzFile);
        (*gzh).xlen = byte;
        fread(&byte, 1, 1, gzFile);
        (*gzh).xlen = (byte << 8) + (*gzh).xlen;

        (*gzh).extraField = new unsigned char[(*gzh).xlen];

        //ler extra field (deixado como está, i.e., não processado...)
        for (int i = 0; i <= (*gzh).xlen - 1; i++)
        {
            fread(&byte, 1, 1, gzFile);
            (*gzh).extraField[i] = byte;
        }
    }
    else
    {
        (*gzh).xlen = 0;
        (*gzh).extraField = 0;
    }

    //FLG_FNAME: ler nome original
    if ((*gzh).FLG_FNAME == 1)
    {
        (*gzh).fName = new char[1024];
        unsigned int i = 0;
        do
        {
            fread(&byte, 1, 1, gzFile);
            if (i <= 1023)  //guarda no máximo 1024 caracteres no array
                (*gzh).fName[i] = byte;
            i++;
        }while(byte != 0);
        if (i > 1023)
            (*gzh).fName[1023] = 0;  //apesar de nome incompleto, garantir que o array termina em 0
    }
    else
        (*gzh).fName = 0;

    //FLG_FCOMMENT: ler comentário
    if ((*gzh).FLG_FCOMMENT == 1)
    {
        (*gzh).fComment = new char[1024];
        unsigned int i = 0;
        do
        {
            fread(&byte, 1, 1, gzFile);
            if (i <= 1023)  //guarda no máximo 1024 caracteres no array
                (*gzh).fComment[i] = byte;
            i++;
        }while(byte != 0);
        if (i > 1023)
            (*gzh).fComment[1023] = 0;  //apesar de comentário incompleto, garantir que o array termina em 0
    }
    else
        (*gzh).fComment = 0;


    //FLG_FHCRC (not processed...)
    if ((*gzh).FLG_FHCRC == 1)
    {
        (*gzh).HCRC = new unsigned char[2];
        fread(&byte, 1, 1, gzFile);
        (*gzh).HCRC[0] = byte;
        fread(&byte, 1, 1, gzFile);
        (*gzh).HCRC[1] = byte;
    }
    else
        (*gzh).HCRC = 0;

    return 0;
}


//---------------------------------------------------------------
//Analisa block header e vê se é huffman dinâmico
int isDynamicHuffman(unsigned char rb)
{
    unsigned char BTYPE = rb & 0x03;

    if (BTYPE == 0) //--> sem compressão
    {
        printf("Ignorando bloco: sem compactação!!!\n");
        return 0;
    }
    else if (BTYPE == 1)
    {
        printf("Ignorando bloco: compactado com Huffman fixo!!!\n");
        return 0;
    }
    else if (BTYPE == 3)
    {
        printf("Ignorando bloco: BTYPE = reservado!!!\n");
        return 0;
    }
    else
        return 1;
}


//---------------------------------------------------------------
//Obtém tamanho do ficheiro original
long getOrigFileSize(FILE * gzFile)
{
    //salvaguarda posição actual do ficheiro
    long fp = ftell(gzFile);

    //últimos 4 bytes = ISIZE;
    fseek(gzFile, -4, SEEK_END);

    //determina ISIZE (só correcto se cabe em 32 bits)
    unsigned long sz = 0;
    unsigned char byte;
    fread(&byte, 1, 1, gzFile);
    sz = byte;
    for (int i = 0; i <= 2; i++)
    {
        fread(&byte, 1, 1, gzFile);
        sz = (byte << 8*(i+1)) + sz;
    }


    //restaura file pointer
    fseek(gzFile, fp, SEEK_SET);

    return sz;
}


//---------------------------------------------------------------
void bits2String(char *strBits, char byte, char len)
{
    char mask = 0x01;  //get LSbit

    strBits[len] = 0;
    for (char bit, i = len-1; i >= 0; i--)
    {
        bit = byte & mask;
        strBits[i] = bit +48; //converter valor numérico para o caracter alfanumérico correspondente
        byte = byte >> 1;
    }
}

int read_data(char needBits, char *availBits, unsigned int *rb, FILE *gzFile){
    int resultado;
    int mask;
    unsigned char byte;

    mask = (1<<needBits)-1;
    while (*availBits < needBits){
        fread(&byte, 1, 1, gzFile);
        *rb = (byte << *availBits) | *rb;
        *availBits += 8;
    }
    resultado = *rb & mask;
    *rb = *rb >> needBits;
    *availBits -= needBits;
    return resultado;
}
//--------------------------------------------------------------
HuffmanTree * compcode_to_huffman(unsigned int *comps, int tam){
    int mini=0, maxi=0;
    for(int i=0; i<tam; i++){
        if(mini == 0 || (comps[i]!=0 && comps[i]<mini))
            mini= comps[i];
        if(comps[i]>maxi)
            maxi= comps[i];
    }
    char * dec_codes;
    dec_codes= (char*)malloc(tam*sizeof(char));
    int entrou;
    int comprimento =0;
    for(int k=mini; k<=maxi; k++){
        entrou=0;
        for(int i=0; i<(tam); i++){
            if(comps[i]==k){
                entrou=1;
                dec_codes[i]=comprimento;
                for(int j=i+1; j<(tam); j++){
                    if(comps[j]==k){
                        comprimento= comprimento+1;
                        dec_codes[j]=comprimento;
                    }
                }
                comprimento= (1+comprimento)*2;
                break;
            }
        }
        if(entrou==0){
            printf("entrou 0 com k= %d\n",k);
            comprimento= comprimento*2;
        }
    }

    for(int i=0; i<tam; i++){
        if(comps[i]==0)
            dec_codes[i]=0;
    }

        char codigos[tam][256];
       // codigos = (char *)malloc(sizeof(char));
        for(int i = 0; i<tam; i++){
            bits2String(codigos[i], dec_codes[i],comps[i]);
            printf("%s ", codigos[i]);
        }

     printf("\n");
        HuffmanTree * hft;
        hft = createHFTree();

        for(int i = 0; i<tam; i++){
            if(strlen(codigos[i])>0){
                addNode(hft, codigos[i], i, 1);
            }
        }
        printf("\n\n");
        /*for(int i = 0; i<tam; i++){
            if(strlen(codigos[i])>0){
                findNode(hft, codigos[i], 1);
            }
        }*/
    return hft;

}


unsigned int * get_length_codes(HuffmanTree *hft, unsigned int *rb, char *availBits, int read_size, FILE *gzfile){
        unsigned int bit;
        int i_node;
        int extra_bits;
        unsigned int * length_codes;
        length_codes= (unsigned int*)malloc(read_size*sizeof(unsigned int));
        int i=0;
        while(i<read_size){
            while(1){
                bit= read_data(1, availBits, rb, gzfile);
                i_node= nextNode(hft, '0' + bit);
                if(i_node>=-1){
                    if(i_node==-1)
                        length_codes[i++]=0;

                    else{
                        if(i_node <16)
                            length_codes[i++]= i_node;
                        else if(i_node==16){
                            bit= read_data(2, availBits, rb, gzfile);
                            extra_bits= bit+3;
                            for(int j=i; j<extra_bits+i;j++)
                                length_codes[j]= length_codes[i-1];
                            i+=extra_bits;
                        }
                        else if(i_node==17){
                            bit= read_data(3, availBits, rb, gzfile);
                            extra_bits= bit+3;
                            for(int j=i; j<extra_bits+i; j++)
                                length_codes[j]=0;
                            i+=extra_bits;

                        }
                        else if(i_node==18){
                            bit= read_data(7, availBits, rb, gzfile);
                            extra_bits= bit+11;
                            for(int j=i; j<extra_bits+i;j++)
                                length_codes[j]= 0;
                            i+=extra_bits;
                        }
                    }
                    resetCurNode(hft);
                    break;
                }
            }
        }
        for(i=0; i<read_size;i++){
            printf("%d ",length_codes[i]);
        }

        return length_codes;
}