SHOW:
|
|
- or go back to the newest paste.
1 | datatype HuffmanTree = | |
2 | Node of (string * int) * HuffmanTree * HuffmanTree | |
3 | | Symbol of char * int; | |
4 | ||
5 | local | |
6 | fun unique [] = [] | |
7 | | unique (x::xs) = x:: unique (List.filter (fn y => y<>x) xs); | |
8 | fun countLetterAux (x:string,a:char,i:int) = if i=size(x) then 0 else (if String.sub(x,i)=a then 1+countLetterAux(x,a,i+1) else countLetterAux(x,a,i+1)); | |
9 | fun countLetter (x:string,a:char) = if size(x)=0 then 0 else countLetterAux(x,a,0); | |
10 | ||
11 | in | |
12 | fun computeHistogram "" = [] | |
13 | | computeHistogram s = map (fn x => (x,countLetter(s,x))) (unique(explode s)) | |
14 | end; | |
15 | ||
16 | local | |
17 | local | |
18 | fun merge _ [] [] = [] | |
19 | | merge _ list [] = list | |
20 | | merge _ [] list = list | |
21 | | merge pred (x::xs) (y::ys) = if pred(x,y) then (y::(merge pred (x::xs) ys)) | |
22 | else (x::(merge pred xs (y::ys))) | |
23 | in | |
24 | fun mergesort _ [] = [] | |
25 | | mergesort _ [x] = [x] | |
26 | | mergesort pred l = merge pred | |
27 | (mergesort pred (List.take (l,((length l) div 2)))) | |
28 | (mergesort pred (List.drop (l,((length l) div 2)))) | |
29 | end; | |
30 | fun compareTuple ((x:string,n1),(y:string,n2)) = if n1=n2 then y<x else n2<n1 | |
31 | fun compareTuple2 ((x:char,n1),(y:char,n2)) = compareTuple((str(x),n1),(str(y),n2)) | |
32 | fun sortAndConvert lst = map (fn (x,n) => Symbol(x,n)) (mergesort compareTuple2 lst) | |
33 | fun buildHuffmanTreeAux (Symbol(x,n1)::Symbol(y,n2)::_) = Node(((str(x)^str(y)),(n1+n2)),Symbol(x,n1),Symbol(y,n2)) | |
34 | | buildHuffmanTreeAux (Node((s,n1),T1,T2)::Symbol(y,n2)::_) = if compareTuple((s,n1),(str(y),n2)) | |
35 | then Node((str(y)^s,n1+n2),Symbol(y,n2),Node((s,n1),T1,T2)) | |
36 | else Node((s^str(y),n1+n2),Node((s,n1),T1,T2),Symbol(y,n2)) | |
37 | fun buildHuffmanList (x::[]) = x | |
38 | | buildHuffmanList lst = buildHuffmanList((buildHuffmanTreeAux (List.take(lst,2)))::(List.drop(lst,2))) | |
39 | in | |
40 | fun buildHuffmanTree lst = buildHuffmanList (sortAndConvert lst) | |
41 | end; | |
42 | ||
43 | fun encodeMessage tree s = | |
44 | let | |
45 | fun LetterEncodingHist (Symbol(x,n),s) = [(x,s)] | |
46 | | LetterEncodingHist (Node(_,T1,T2),s) = (LetterEncodingHist (T1,s^"0"))@(LetterEncodingHist (T2,s^"1")) | |
47 | fun LetterEncoding ([],_) = "" | |
48 | | LetterEncoding ((y,s)::lst,x) = if y=x then s else LetterEncoding (lst,x) | |
49 | val hist = LetterEncodingHist(tree,"") | |
50 | in | |
51 | foldr op^ "" (map (fn c => LetterEncoding(hist,c)) (explode s)) | |
52 | end; | |
53 | ||
54 | local | |
55 | fun decodeMessageAux (Symbol(a,n)) _ [] = str(a) | |
56 | | decodeMessageAux (Symbol(a,n)) T2 lst = str(a)^(decodeMessageAux T2 T2 lst) | |
57 | | decodeMessageAux (Node((x,n),TL,TR)) T2 lst = if (hd lst) = #"0" then decodeMessageAux TL T2 (tl lst) | |
58 | else decodeMessageAux TR T2 (tl lst) | |
59 | in | |
60 | fun decodeMessage tree s = decodeMessageAux tree tree (explode s) | |
61 | end; | |
62 | ||
63 | fun encodeTree tree = | |
64 | let | |
65 | (*convert positive base10 number into binary number*) | |
66 | fun num2binary_aux(0,false) = "0" | |
67 | | num2binary_aux(0,true) = "" | |
68 | - | else num2binary_aux((n div 2),true)^"1" |
68 | + | |
69 | else num2binary_aux((n div 2),true)^"1" | |
70 | - | | num2binary n = num2binary_aux(n,true) |
70 | + | |
71 | | num2binary n = num2binary_aux(n,true) | |
72 | - | fun max_appearance (Symbol(x,n)) = n |
72 | + | (*convert the char into binary number which is the ascii encoding*) |
73 | - | | max_appearance (Node((_,_),Symbol(_,n),_)) = n |
73 | + | |
74 | - | | max_appearance (Node((_,_),_,Symbol(_,n))) = n |
74 | + | (*find the max appearance of a char in huffman tree*) |
75 | fun maxAppearance (Symbol(x,n)) = n | |
76 | | maxAppearance (Node((_,_),Symbol(_,n),_)) = n | |
77 | - | fun complete_zeros(n,lst) = if length(lst) = n then lst else #"0"::complete_zeros(n-1,lst) |
77 | + | | maxAppearance (Node((_,_),_,Symbol(_,n))) = n |
78 | - | val hist = LetterEncodingHist(tree) |
78 | + | (*create histogram of the letter appearances*) |
79 | - | val max_app = max_appearance(tree) |
79 | + | |
80 | - | val max_app_encode = complete_zeros(19,explode(num2binary(max_app))) |
80 | + | |
81 | - | fun encode_tuple (x,y) = complete_zeros(8,explode(char2binary(x))) @ (complete_zeros(max_app,explode(num2binary(y)))) |
81 | + | (*add Zeros in the start of the list untill the length of the list is equal to the given number*) |
82 | - | fun append_to_complete lst = if (length(lst) mod 8) = 0 then lst else append_to_complete(lst@[#"0"]) |
82 | + | fun addZeros(n,lst) = if length(lst) = n then lst else #"0"::addZeros(n-1,lst) |
83 | (*calculate the ceiling of log of the number*) | |
84 | fun log2 0 = 0 | log2 n = 1 + log2(n div 2) | |
85 | val max_app = maxAppearance(tree) | |
86 | (*save the number of bits needed to encode the number of char appearances*) | |
87 | val max_app_encode = addZeros(5,explode(num2binary(log2(max_app)))) | |
88 | (*encode the first var of the Tuple to it binary which represent the ascii of the char and the appearance of the char to binary*) | |
89 | - | encode_list (append_to_complete(max_app_encode @ (foldr op@ [] (map encode_tuple hist)))) [] |
89 | + | fun encodeTuple (x,y) = addZeros(8,explode(char2binary(x))) @ (addZeros(log2(max_app),explode(num2binary(y)))) |
90 | (* add zeros at the end that the list length will be multiply of 8*) | |
91 | fun appendToCompleteMult lst = if (length(lst) mod 8) = 0 then lst else appendToCompleteMult(lst@[#"0"]) | |
92 | fun binary_to_num [] _ = 0 | |
93 | | binary_to_num (x::xs) m = if x = #"1" then (m + (binary_to_num xs (2*m))) else (binary_to_num xs (2*m)) | |
94 | fun encode_list [] _= "" | |
95 | | encode_list (x::xs) temp = if length(temp) = 8 then str(chr(binary_to_num temp 1))^(encode_list xs [x]) | |
96 | else (encode_list xs (temp@[x])) | |
97 | in | |
98 | encode_list (appendToCompleteMult(max_app_encode @ (foldr op@ [] (map encodeTuple (LetterEncodingHist(tree)))))) [] | |
99 | end; | |
100 | ||
101 | fun decodeHuffmanTree s = | |
102 | let | |
103 | fun powerOfTwo 0 = 1 | |
104 | | powerOfTwo n = 2*(powerOfTwo n-1) | |
105 | fun NumToBinary (_,~1,lst) = lst (*convert char to binary code list*) | |
106 | | NumToBinary (0,i,lst) = NumToBinary (0,i-1,lst@[0]) | |
107 | | NumToBinary (n,i,lst) = | |
108 | let | |
109 | val power = powerOfTwo i | |
110 | in | |
111 | if n>=power then NumToBinary(n-power,i-1,lst@[1]) else NumToBinary(n,i-1,lst@[0]) | |
112 | end | |
113 | fun BinaryToNum (0,lst) = hd lst | |
114 | | BinaryToNum (i,lst) = ((hd lst)*(powerOfTwo i))+BinaryToNum(i-1,tl lst) | |
115 | ||
116 | val char_list = explode s | |
117 | val BinaryList = foldr op@ [] (map (fn c => NumToBinary(ord(c),7,[])) char_list) | |
118 | val max_len = BinaryToNum(4,List.take(BinaryList,5)) | |
119 | ||
120 | fun createPair lst = (chr(BinaryToNum(7,List.take(lst,8))),BinaryToNum(max_len,List.drop(lst,8))) (*creates a pair of (char,int) from a list*) | |
121 | fun BinaryCodeToHist (lst,hist) = if (length lst)<(8+max_len) then hist (*creates a hist of (char,int) from the binary list*) | |
122 | else BinaryCodeToHist ((List.drop(lst,(8+max_len))),((createPair (List.take(lst,(8+max_len)))::hist))) | |
123 | in | |
124 | buildHuffmanTree (BinaryCodeToHist ((List.drop(BinaryList,5)),[])) | |
125 | end; |