Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- //对位进行相关操作
- //这里的unsigned char对于位是0或1,对于偏移是0到7
- #pragma region Bit
- //设byte为10101100(对应offset为76543210),即172,读取offset为4和5的位
- //
- //offset=4
- //00010000 1<<offset
- //10101100 byte
- //00000000 byte&(1<<offset)
- //00000000 (byte&(1<<offset))>>offset
- //
- //offset=5
- //00100000 1<<offset
- //10101100 byte
- //00100000 byte&(1<<offset)
- //00000001 (byte&(1<<offset))>>offset
- unsigned char ReadBit(unsigned char byte, unsigned char offset) {
- return (byte & (1 << offset)) >> offset;
- }
- //设byte为10101100(对应offset为76543210),即172,将offset为4的位write为0和1
- //
- //offset=4 write=0
- //00000000 write<<offset
- //10101100 byte
- //10101100 byte|(write<<offset)
- //
- //offset=4 write=1
- //00010000 write<<offset
- //10101100 byte
- //10111100 byte|(write<<offset)
- void WriteBit(unsigned char *byte, unsigned char offset, unsigned char write) {
- *byte |= write << offset;
- }
- #pragma endregion
- #pragma region Huffman
- //动态分配数组存储H树
- typedef struct {
- unsigned int weight; //字符的权
- unsigned int parent; //双亲的地址
- unsigned int lchild; //左子树的地址
- unsigned int rchild; //右子树的地址
- } HTNode, *HuffmanTree;
- //动态分配数组存储H树编码表
- typedef char **HuffmanCode;
- //在HT[1..range]的范围中找到parent=0,权最小的两项,下标分别为s1和s2,且s1<s2
- void Select(HuffmanTree HT, unsigned int range, unsigned int *s1, unsigned int *s2) {
- unsigned int v1 = 4294967295, v2 = 4294967295; //unsigned int的最大值
- *s1 = 0;
- *s2 = 0;
- for (unsigned int i = 1; i <= range; i++) {
- if (HT[i].parent) {
- continue;
- }
- if (HT[i].weight < v1) {
- v2 = v1;
- v1 = HT[i].weight;
- *s2 = *s1;
- *s1 = i;
- } else if (HT[i].weight < v2) {
- v2 = HT[i].weight;
- *s2 = i;
- }
- }
- //保证s1<s2
- v1 = *s1;
- v2 = *s2;
- *s1 = (v1 < v2) ? v1 : v2;
- *s2 = (v1 > v2) ? v1 : v2;
- }
- //构造H树
- void InitHuffmanTree(HuffmanTree *HT, unsigned int *w, unsigned int n) {
- unsigned int m = 2 * n - 1; //H树一共有2n-1个节点
- *HT = malloc((m + 1) * sizeof(HTNode)); //留一个0号结点表示指向空树,相当于二叉树中的NULL
- //为每个字符构建只有根结点,没有左右子树的二叉树
- (*HT)[0].weight = 0;
- (*HT)[0].parent = 0;
- (*HT)[0].lchild = 0;
- (*HT)[0].rchild = 0;
- HTNode *p = *HT + 1; //从1号结点开始
- unsigned int i = 1;
- while (i <= n) {
- (*p).weight = *w;
- (*p).parent = 0;
- (*p).lchild = 0;
- (*p).rchild = 0;
- i++;
- p++;
- w++;
- }
- //剩下的树是空树
- while (i <= m) {
- (*p).weight = 0;
- (*p).parent = 0;
- (*p).lchild = 0;
- (*p).rchild = 0;
- i++;
- p++;
- }
- //开始生成H树
- unsigned int s1, s2;
- for (i = n + 1; i <= m; i++) {
- Select(*HT, i - 1, &s1, &s2);
- (*HT)[s1].parent = i;
- (*HT)[s2].parent = i;
- (*HT)[i].lchild = s1;
- (*HT)[i].rchild = s2;
- (*HT)[i].weight = (*HT)[s1].weight + (*HT)[s2].weight;
- }
- }
- //根据H树求每个字符的编码
- void InitHuffmanCode(HuffmanCode *HC, HuffmanTree HT, unsigned int n) {
- *HC = malloc((n + 1) * sizeof(char*)); //HC相当于一个由字符数组组成的数组,里面的元素类型是char*
- char *cd = malloc(n * sizeof(char)); //求编码的工作区,每个字符的编码长度一定不会超过n
- cd[n - 1] = '\0'; //字符数组以\0结尾
- unsigned int start; //表示一个编码从cd的哪一项开始
- unsigned int c, f; //c指当前指向的树,f是c的双亲
- //对每个字符逆向求编码
- for (unsigned int i = 1; i <= n; i++) {
- start = n - 1;
- //求编码从叶子结点开始,到c指向根结点为止,此时f为0
- for (c = i, f = HT[i].parent; f != 0; c = f, f = HT[f].parent) {
- start--; //向前移一位
- if (HT[f].lchild == c) {
- cd[start] = '0'; //左子树的路径用0表示
- }
- else {
- cd[start] = '1'; //右子树的路径用1表示
- }
- }
- //从cd复制到HC
- (*HC)[i] = malloc((n - start) * sizeof(char));
- strcpy_s((*HC)[i], n - start + 1, &cd[start]);
- }
- free(cd);
- }
- //HT存放构造的H树
- //HC存放构造的编码
- //w指向存放了每个字符的权的数组
- //n是字符数量
- //把建树和编码的部分分开了
- void HuffmanCoding(HuffmanTree *HT, HuffmanCode *HC, unsigned int *w, unsigned int n) {
- //只有一个或者没有字符还有什么编码的必要吗?
- if (n <= 1) {
- return;
- }
- InitHuffmanTree(HT, w, n);
- InitHuffmanCode(HC, *HT, n);
- }
- #pragma endregion
- //以表的形式输出对n个字符进行编码的HT
- void PrintHuffmanTree(HuffmanTree HT, unsigned int n) {
- for (unsigned int i = 0; i <= 2 * n - 1; i++) {
- printf("%3d w:%3d p:%3d lc:%3d rc:%3d\n", i, HT[i].weight, HT[i].parent, HT[i].lchild, HT[i].rchild);
- }
- }
- int main() {
- unsigned int Select = 1;
- HuffmanTree HT = NULL;
- HuffmanCode HC = NULL;
- unsigned int CharSetSize; //字符集大小
- char *CharSet = NULL; //字符
- unsigned int *CharWeight = NULL; //每个字符对应的权
- FILE *fpHuff = NULL, *fpText = NULL, *fpCode = NULL;
- unsigned long long FileSize, CodeSize; //进行编码时源文件的大小和编码的位数
- unsigned int Status; //0表示正常,1表示出错
- unsigned int Index; //指向的编码表索引
- unsigned char Offset, Byte, Read; //读写二进制位用
- unsigned char *pCode = NULL; //读取编码表用
- while (Select) {
- puts("\n-- Huffman树编码演示 --");
- puts("1 - 建立并保存Huffman树到Huffman.bin");
- puts("2 - 从TextFile.txt读取文件,以Huffman.bin的数据作为Huffman树进行编码");
- puts("3 - 从CodeFile.bin读取文件,以Huffman.bin的数据作为Huffman树进行解码");
- puts("0 - 退出");
- puts("输入选项:");
- scanf_s("%d", &Select);
- switch (Select) {
- case 1:
- puts("输入字符集大小:");
- scanf_s("%d", &CharSetSize);
- CharSet = malloc(CharSetSize * sizeof(char));
- CharWeight = malloc(CharSetSize * sizeof(unsigned int));
- for (unsigned int i = 0; i < CharSetSize; i++) {
- printf("输入第%d个字符:\n", i + 1);
- scanf_s(" %c", &CharSet[i], 1);
- printf("输入第%d个字符的权:\n", i + 1);
- scanf_s("%d", &CharWeight[i]);
- }
- HuffmanCoding(&HT, &HC, CharWeight, CharSetSize);
- puts("\n生成的Huffman树:");
- PrintHuffmanTree(HT, CharSetSize);
- puts("\n字符对应的编码:");
- for (unsigned int i = 0; i < CharSetSize; i++) {
- printf("%c %s\n", CharSet[i], HC[i + 1]);
- }
- //保存H树
- fopen_s(&fpHuff, "Huffman.bin", "wb");
- fwrite(&CharSetSize, sizeof(unsigned int), 1, fpHuff);
- fwrite(CharSet, sizeof(char), CharSetSize, fpHuff);
- fwrite(CharWeight, sizeof(unsigned int), CharSetSize, fpHuff);
- fwrite(HT, sizeof(HTNode), 2 * CharSetSize, fpHuff);
- fclose(fpHuff);
- puts("\n已保存Huffman树。");
- break;
- case 2:
- //读取H树
- if (CharSet) {
- free(CharSet);
- }
- if (CharWeight) {
- free(CharWeight);
- }
- if (HT) {
- free(HT);
- }
- if (HC) {
- free(HC);
- }
- fopen_s(&fpHuff, "Huffman.bin", "rb");
- fread(&CharSetSize, sizeof(unsigned int), 1, fpHuff);
- CharSet = malloc(CharSetSize * sizeof(char));
- CharWeight = malloc(CharSetSize * sizeof(unsigned int));
- HT = malloc(2 * CharSetSize * sizeof(HTNode));
- fread(CharSet, sizeof(char), CharSetSize, fpHuff);
- fread(CharWeight, sizeof(unsigned int), CharSetSize, fpHuff);
- fread(HT, sizeof(HTNode), 2 * CharSetSize, fpHuff);
- fclose(fpHuff);
- //根据H树建立编码表
- InitHuffmanCode(&HC, HT, CharSetSize);
- //读取TextFile
- fopen_s(&fpCode, "CodeFile.bin", "wb");
- fopen_s(&fpText, "TextFile.txt", "rb");
- fseek(fpText, 0L, SEEK_END);
- FileSize = ftell(fpText);
- fseek(fpText, 0L, SEEK_SET);
- CodeSize = 0;
- Offset = 7;
- Byte = 0;
- //对TextFile中的每个字符进行编码
- for (unsigned long long i = 0; i < FileSize; i++) {
- Status = 1;
- fread(&Read, sizeof(char), 1, fpText);
- //从编码表中查找对应的编码
- for (unsigned int j = 0; j < CharSetSize; j++) {
- if (CharSet[j] == Read) {
- pCode = HC[j + 1];
- while (*pCode) {
- CodeSize++;
- WriteBit(&Byte, Offset, *pCode - '0');
- if (Offset) {
- //字节没有写满,offset-1
- Offset--;
- } else {
- //字节写满了就写入文件,然后清空字节
- Offset = 7;
- fwrite(&Byte, sizeof(unsigned char), 1, fpCode);
- Byte = 0;
- }
- pCode++;
- }
- Status = 0;
- break;
- }
- }
- if (Status) {
- printf("提示:已忽略没有在字符集中出现的字符%c\n", Read);
- }
- }
- //最后一个字节没有写满也要写入,没有写满的位都是0
- if (Offset != 7) {
- fwrite(&Byte, sizeof(unsigned char), 1, fpCode);
- }
- //在文件最后写入编码位数
- fwrite(&CodeSize, sizeof(unsigned long long), 1, fpCode);
- fclose(fpCode);
- fclose(fpText);
- puts("\n编码完成。");
- break;
- case 3:
- //读取H树
- if (CharSet) {
- free(CharSet);
- }
- if (CharWeight) {
- free(CharWeight);
- }
- if (HT) {
- free(HT);
- }
- if (HC) {
- free(HC);
- }
- fopen_s(&fpHuff, "Huffman.bin", "rb");
- fread(&CharSetSize, sizeof(unsigned int), 1, fpHuff);
- CharSet = malloc(CharSetSize * sizeof(char));
- CharWeight = malloc(CharSetSize * sizeof(unsigned int));
- HT = malloc(2 * CharSetSize * sizeof(HTNode));
- fread(CharSet, sizeof(char), CharSetSize, fpHuff);
- fread(CharWeight, sizeof(unsigned int), CharSetSize, fpHuff);
- fread(HT, sizeof(HTNode), 2 * CharSetSize, fpHuff);
- fclose(fpHuff);
- //根据H树建立编码表
- InitHuffmanCode(&HC, HT, CharSetSize);
- //读取CodeFile
- Index = 2 * CharSetSize - 1;
- fopen_s(&fpCode, "CodeFile.bin", "rb");
- fopen_s(&fpText, "TextFile.txt", "wb");
- //读取编码位数
- fseek(fpCode, -1L * (long)sizeof(unsigned long long), SEEK_END);
- fread(&CodeSize, sizeof(unsigned long long), 1, fpCode);
- fseek(fpCode, 0L, SEEK_SET);
- Offset = 7;
- fread(&Byte, sizeof(unsigned char), 1, fpCode); //先读取一个字节
- //对CodeFile中的每个位进行读取
- for (unsigned long long i = 0; i < CodeSize; i++) {
- Read = ReadBit(Byte, Offset);
- if (Read) {
- //读到1就移向右子树
- Index = HT[Index].rchild;
- } else {
- //读到0就移向右子树
- Index = HT[Index].lchild;
- }
- //移到叶子结点就写入字符,同时移回根结点
- if (!HT[Index].lchild && !HT[Index].rchild) {
- fwrite(&CharSet[Index - 1], sizeof(char), 1, fpText);
- Index = 2 * CharSetSize - 1;
- }
- if (Offset) {
- //字节没有读完,offset-1
- Offset--;
- } else {
- //字节读完了就读下一个字节
- Offset = 7;
- fread(&Byte, sizeof(unsigned char), 1, fpCode);
- }
- }
- fclose(fpCode);
- fclose(fpText);
- puts("\n解码完成。");
- break;
- }
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement