SHARE
TWEET

too lazy/too strict

a guest Sep 3rd, 2010 33 Never
  1. import Data.List.Split
  2. import Data.Word
  3. import Text.HTML.TagSoup
  4. import qualified Data.ByteString.Lazy as B
  5.  
  6. bsFromString :: [String] -> B.ByteString
  7. bsFromString = B.pack . map (read . ("0x"++))
  8.  
  9. soupToChunks :: String -> [String]
  10. soupToChunks = filter len2 . wordsBy (`elem` " \n") . fromTagText . getPreText
  11.  
  12. len2 (_:_:[]) = True
  13. len2 _ = False
  14.  
  15. getPreText = (!! 1) . last . partitions (isTagOpenName "pre") . parseTags
  16.  
  17. main = do
  18.   f <- readFile "Cache entry information.xhtml"
  19.   B.writeFile "test" . bsFromString . soupToChunks $ f
RAW Paste Data
Top