Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- datatype HuffmanTree =
- Node of (string * int) * HuffmanTree * HuffmanTree
- | Symbol of char * int;
- local
- fun unique [] = []
- | unique (x::xs) = x:: unique (List.filter (fn y => y<>x) xs);
- fun countLetterAux (x:string,a:char,i:int) = if i=size(x) then 0 else (if String.sub(x,i)=a then 1+countLetterAux(x,a,i+1) else countLetterAux(x,a,i+1));
- fun countLetter (x:string,a:char) = if size(x)=0 then 0 else countLetterAux(x,a,0);
- in
- fun computeHistogram "" = []
- | computeHistogram s = map (fn x => (x,countLetter(s,x))) (unique(explode s))
- end;
- local
- local
- fun merge _ [] [] = []
- | merge _ list [] = list
- | merge _ [] list = list
- | merge pred (x::xs) (y::ys) = if pred(x,y) then (y::(merge pred (x::xs) ys))
- else (x::(merge pred xs (y::ys)))
- in
- fun mergesort _ [] = []
- | mergesort _ [x] = [x]
- | mergesort pred l = merge pred
- (mergesort pred (List.take (l,((length l) div 2))))
- (mergesort pred (List.drop (l,((length l) div 2))))
- end;
- fun compareTuple ((x:string,n1),(y:string,n2)) = if n1=n2 then y<x else n2<n1
- fun compareTuple2 ((x:char,n1),(y:char,n2)) = compareTuple((str(x),n1),(str(y),n2))
- fun sortAndConvert lst = map (fn (x,n) => Symbol(x,n)) (mergesort compareTuple2 lst)
- fun buildHuffmanTreeAux (Symbol(x,n1)::Symbol(y,n2)::_) = Node(((str(x)^str(y)),(n1+n2)),Symbol(x,n1),Symbol(y,n2))
- | buildHuffmanTreeAux (Node((s,n1),T1,T2)::Symbol(y,n2)::_) = if compareTuple((s,n1),(str(y),n2))
- then Node((str(y)^s,n1+n2),Symbol(y,n2),Node((s,n1),T1,T2))
- else Node((s^str(y),n1+n2),Node((s,n1),T1,T2),Symbol(y,n2))
- fun buildHuffmanList (x::[]) = x
- | buildHuffmanList lst = buildHuffmanList((buildHuffmanTreeAux (List.take(lst,2)))::(List.drop(lst,2)))
- in
- fun buildHuffmanTree lst = buildHuffmanList (sortAndConvert lst)
- end;
- fun encodeMessage tree s =
- let
- fun LetterEncodingHist (Symbol(x,n),s) = [(x,s)]
- | LetterEncodingHist (Node(_,T1,T2),s) = (LetterEncodingHist (T1,s^"0"))@(LetterEncodingHist (T2,s^"1"))
- fun LetterEncoding ([],_) = ""
- | LetterEncoding ((y,s)::lst,x) = if y=x then s else LetterEncoding (lst,x)
- val hist = LetterEncodingHist(tree,"")
- in
- foldr op^ "" (map (fn c => LetterEncoding(hist,c)) (explode s))
- end;
- local
- fun decodeMessageAux (Symbol(a,n)) _ [] = str(a)
- | decodeMessageAux (Symbol(a,n)) T2 lst = str(a)^(decodeMessageAux T2 T2 lst)
- | decodeMessageAux (Node((x,n),TL,TR)) T2 lst = if (hd lst) = #"0" then decodeMessageAux TL T2 (tl lst)
- else decodeMessageAux TR T2 (tl lst)
- in
- fun decodeMessage tree s = decodeMessageAux tree tree (explode s)
- end;
- fun encodeTree tree =
- let
- (*convert positive base10 number into binary number*)
- fun num2binary_aux(0,false) = "0"
- | num2binary_aux(0,true) = ""
- | num2binary_aux(n,_) = if (n mod 2) = 0 then num2binary_aux((n div 2),true)^"0"
- else num2binary_aux((n div 2),true)^"1"
- fun num2binary 0 = num2binary_aux(0,false)
- | num2binary n = num2binary_aux(n,true)
- (*convert the char into binary number which is the ascii encoding*)
- fun char2binary c = num2binary(ord(c))
- (*find the max appearance of a char in huffman tree*)
- fun maxAppearance (Symbol(x,n)) = n
- | maxAppearance (Node((_,_),Symbol(_,n),_)) = n
- | maxAppearance (Node((_,_),_,Symbol(_,n))) = n
- (*create histogram of the letter appearances*)
- fun LetterEncodingHist (Symbol(x,n)) = [(x,n)]
- | LetterEncodingHist (Node(_,T1,T2)) = (LetterEncodingHist T1)@(LetterEncodingHist T2)
- (*add Zeros in the start of the list untill the length of the list is equal to the given number*)
- fun addZeros(n,lst) = if length(lst) = n then lst else #"0"::addZeros(n-1,lst)
- (*calculate the ceiling of log of the number*)
- fun log2 0 = 0 | log2 n = 1 + log2(n div 2)
- val max_app = maxAppearance(tree)
- (*save the number of bits needed to encode the number of char appearances*)
- val max_app_encode = addZeros(5,explode(num2binary(log2(max_app))))
- (*encode the first var of the Tuple to it binary which represent the ascii of the char and the appearance of the char to binary*)
- fun encode_tuple (x,y) = addZeros(8,explode(char2binary(x))) @ (addZeros(log2(max_app),explode(num2binary(y))))
- (* add zeros at the end that the list length will be multiply of 8*)
- fun append_to_complete lst = if (length(lst) mod 8) = 0 then lst else append_to_complete(lst@[#"0"])
- fun binary_to_num [] _ = 0
- | binary_to_num (x::xs) m = if x = #"1" then (m + (binary_to_num xs (2*m))) else (binary_to_num xs (2*m))
- fun encode_list [] _= ""
- | encode_list (x::xs) temp = if length(temp) = 8 then str(chr(binary_to_num temp 1))^(encode_list xs [x])
- else (encode_list xs (temp@[x]))
- in
- encode_list (append_to_complete(max_app_encode @ (foldr op@ [] (map encode_tuple LetterEncodingHist(tree))))) []
- end;
- fun decodeHuffmanTree s =
- let
- fun powerOfTwo 0 = 1
- | powerOfTwo n = 2*(powerOfTwo n-1)
- fun NumToBinary (_,~1,lst) = lst (*convert char to binary code list*)
- | NumToBinary (0,i,lst) = NumToBinary (0,i-1,lst@[0])
- | NumToBinary (n,i,lst) =
- let
- val power = powerOfTwo i
- in
- if n>=power then NumToBinary(n-power,i-1,lst@[1]) else NumToBinary(n,i-1,lst@[0])
- end
- fun BinaryToNum (0,lst) = hd lst
- | BinaryToNum (i,lst) = ((hd lst)*(powerOfTwo i))+BinaryToNum(i-1,tl lst)
- val char_list = explode s
- val BinaryList = foldr op@ [] (map (fn c => NumToBinary(ord(c),7,[])) char_list)
- val max_len = BinaryToNum(4,List.take(BinaryList,5))
- fun createPair lst = (chr(BinaryToNum(7,List.take(lst,8))),BinaryToNum(max_len,List.drop(lst,8))) (*creates a pair of (char,int) from a list*)
- fun BinaryCodeToHist (lst,hist) = if (length lst)<(8+max_len) then hist (*creates a hist of (char,int) from the binary list*)
- else BinaryCodeToHist (List.drop(lst,(8+max_len))) ((createPair List.take(lst,(8+max_len)))::hist)
- in
- buildHuffmanTree (BinaryCodeToHist List.drop(BinaryList,5) [])
- end;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement