import Data.List (sortBy) import Data.Char (digitToInt, chr, ord) probabilities "" = [] probabilities (x:xs) = (x, length $ filter (== x) (x:xs)) : probabilities (filter (/= x) xs) data HTree weight char = EmptyHTree | Leaf weight char | Branch weight (HTree weight char) (HTree weight char) deriving (Show, Read) weight (EmptyHTree) = 0 weight (Leaf w _) = w weight (Branch w _ _) = w compareHTree t1 t2 = compare (weight t1) (weight t2) insert _ x [] = [x] insert predicate x (y:ys) | predicate x y = (x:y:ys) | otherwise = y:(insert predicate x ys) _buildHTree [] = EmptyHTree _buildHTree [x] = x _buildHTree (x:y:xs) = _buildHTree $ insert (\x y -> (compareHTree x y) == LT) (Branch ((weight x) + (weight y)) x y) xs buildHTree s = _buildHTree $ sortBy compareHTree $ map (\x -> Leaf (snd x) (fst x)) $ probabilities s parseHTree prefix EmptyHTree = [] parseHTree prefix (Leaf _ c) = [(prefix, c)] parseHTree prefix (Branch _ t1 t2) = (parseHTree (prefix ++ "0") t1) ++ (parseHTree (prefix ++ "1") t2) buildHuffmanCodeTable s = parseHTree "" $ buildHTree s huffmanCode s = bitsToString $ concat $ map (\x -> fst $ head $ filter (\y -> snd y == x) (buildHuffmanCodeTable s)) s where bitsToString s | s == "" = "" | length s < 8 = bitsToString $ take 8 (s ++ "00000000") | otherwise = (byteToChar $ take 8 s) : bitsToString (drop 8 s) where byteToChar s = chr $ foldl (\x y -> 2 * x + (digitToInt y)) 0 s huffmanDecode probs s len = take len $ _huffmanDecode codeTable "" $ stringToBits s where codeTable = parseHTree "" $ _buildHTree $ sortBy compareHTree $ map (\x -> Leaf (snd x) (fst x)) $ probs _huffmanDecode codes prefix "" | has prefix (map fst codes) = [snd $ head $ filter (\x -> fst x == prefix) codes] | otherwise = "" _huffmanDecode codes prefix s | has prefix (map fst codes) = (snd $ head $ filter (\x -> fst x == prefix) codes) : _huffmanDecode codes "" s | otherwise = _huffmanDecode codes (prefix ++ [head s]) (tail s) has x l = (length $ (filter (== x) l)) > 0 stringToBits "" = "" stringToBits (x:xs) = (charToByte x) ++ stringToBits xs where charToByte c = reverse $ take 8 $ ( _charToByte $ ord c) ++ "00000000" where _charToByte _c | _c == 0 = "" | mod _c 2 == 1 = '1' : (_charToByte (div _c 2)) | otherwise = '0' : (_charToByte (div _c 2))