- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <time.h>
- #define NUM_CHARS 256
- struct treenode {
- int freq;
- unsigned char ch;
- treenode *left, *right;
- };
- typedef struct _pq {
- int heap_size;
- treenode *A[NUM_CHARS];
- } PQ;
- void create_pq (PQ *p) {
- p->heap_size = 0;
- }
- int parent (int i) {
- return (i-1) / 2;
- }
- int left (int i) {
- return i * 2 + 1;
- }
- int right (int i) {
- return i * 2 + 2;
- }
- void heapify (PQ *p, int i) {
- int l, r, min;
- treenode *t;
- l = left (i);
- r = right (i);
- if (l < p->heap_size && p->A[l]->freq < p->A[i]->freq)
- min = l;
- else
- min = i;
- if (r < p->heap_size && p->A[r]->freq < p->A[min]->freq)
- min = r;
- if (min != i) {
- t = p->A[i];
- p->A[i] = p->A[min];
- p->A[min] = t;
- heapify (p, min);
- }
- }
- void insert_pq (PQ *p, treenode *r) {
- int i;
- p->heap_size++;
- i = p->heap_size - 1;
- while ((i > 0) && (p->A[parent(i)]->freq > r->freq)) {
- p->A[i] = p->A[parent(i)];
- i = parent (i);
- }
- p->A[i] = r;
- }
- treenode *extract_min_pq (PQ *p) {
- treenode *r;
- if (p->heap_size == 0) {
- printf ("heap underflow!\n");
- exit (1);
- }
- r = p->A[0];
- p->A[0] = p->A[p->heap_size-1];
- p->heap_size--;
- heapify (p, 0);
- return r;
- }
- unsigned int get_frequencies (FILE *f, unsigned int v[]) {
- int r, n;
- for (n=0;;n++) {
- r = fgetc (f);
- if (feof (f)) break;
- v[r]++;
- }
- return n;
- }
- treenode *build_huffman (unsigned int freqs[]) {
- int i, n;
- treenode *x, *y, *z;
- PQ p;
- create_pq (&p);
- for (i=0; i< NUM_CHARS; i++) {
- x = new treenode;
- x->left = NULL;
- x->right = NULL;
- x->freq = freqs[i];
- x->ch = (char) i;
- insert_pq (&p, x);
- }
- n = p.heap_size-1;
- for (i=0; i< n; i++) {
- z = new treenode;
- x = extract_min_pq (&p);
- y = extract_min_pq (&p);
- z->left = x;
- z->right = y;
- z->freq = x->freq + y->freq;
- insert_pq (&p, z);
- }
- return extract_min_pq (&p);
- }
- void traverse (treenode *r, int level, char code_so_far[], char *codes[]) {
- if ((r->left == NULL) && (r->right == NULL)) {
- code_so_far[level] = 0;
- codes[r->ch] = strdup (code_so_far);
- } else {
- code_so_far[level] = '0';
- traverse (r->left, level+1, code_so_far, codes);
- code_so_far[level] = '1';
- traverse (r->right, level+1, code_so_far, codes);
- }
- }
- int nbits, current_byte, nbytes;
- void bitout (FILE *f, char b) {
- current_byte << 1;
- if (b == '1') current_byte |= 1;
- nbits++;
- if (nbits == 8) {
- fputc (current_byte, f);
- nbytes++;
- nbits = 0;
- current_byte = 0;
- }
- }
- void encode_file (FILE *infile, FILE *outfile, char *codes[]) {
- unsigned char ch;
- char *s;
- current_byte = 0;
- nbits = 0;
- nbytes = 0;
- for (;;) {
- ch = fgetc (infile);
- if (feof (infile)) break;
- for (s=codes[ch]; *s; s++) bitout (outfile, *s);
- }
- while (nbits) bitout (outfile, '0');
- }
- int main (int argc, char *argv[]) {
- FILE *f, *g;
- treenode *r;
- unsigned int n, freqs[NUM_CHARS];
- char *codes[NUM_CHARS], code[NUM_CHARS], fname[100];
- if (argc != 2) {
- fprintf (stderr, "%s <fisier>\n", argv[0]);
- exit (1);
- }
- memset (freqs, 0, sizeof (freqs));
- f = fopen (argv[1], "r");
- if (!f) {
- perror (argv[1]);
- exit (1);
- }
- n = get_frequencies (f, freqs);
- fclose (f);
- r = build_huffman (freqs);
- traverse (r, 0, code, codes);
- sprintf (fname, "%s.huf", argv[1]);
- g = fopen (fname, "w");
- if (!g) {
- perror (fname);
- exit (1);
- }
- fwrite (freqs, NUM_CHARS, sizeof (int), g);
- fwrite (&n, 1, sizeof (int), g);
- f = fopen (argv[1], "r");
- if (!f) {
- perror (argv[1]);
- exit (1);
- }
- encode_file (f, g, codes);
- fclose (f);
- fclose (g);
- printf ("%s is %0.2f%% of %s\n",
- fname, (float) nbytes / (float) n, argv[1]);
- exit (0);
- }