import Data.List.Split import Data.Word import Text.HTML.TagSoup import qualified Data.ByteString.Lazy as B bsFromString :: [String] -> B.ByteString bsFromString = B.pack . map (read . ("0x"++)) soupToChunks :: String -> [String] soupToChunks = filter len2 . wordsBy (`elem` " \n") . fromTagText . getPreText len2 (_:_:[]) = True len2 _ = False getPreText = (!! 1) . last . partitions (isTagOpenName "pre") . parseTags main = do f <- readFile "Cache entry information.xhtml" B.writeFile "test" . bsFromString . soupToChunks $ f