Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import Control.Monad
- import Network.HTTP
- import Text.Regex.Posix
- import Text.HTML.TagSoup
- import Data.List
- data Episode = Episode {
- episodeRunning :: Maybe String,
- episodeSeason :: Maybe String,
- title :: Maybe String,
- airDate :: Maybe String
- }
- deriving Show
- data Season = Season {
- number :: Maybe String,
- episodes :: [Maybe Episode]
- }
- deriving Show
- data Series = Series [Maybe Season]
- deriving Show
- (!!!) :: [a] -> Int -> Maybe a
- list !!! index
- | index < length list = Just $ list !! index
- | otherwise = Nothing
- parseSeries :: [Tag String] -> Series
- parseSeries tags = Series $ parseSeason tags
- parseSeason :: [Tag String] -> [Maybe Season]
- parseSeason (x:xs) = (\episodes -> [Just $ Season ((extract !!! 0) >>= (!!! 0)) (fst episodes)] ++ parseSeason (snd episodes)) $ parseEpisodes ([],xs)
- where extract = take 1 $ drop 1 $ nub $ (fromAttrib "id" x) =~ "[0-9]*" :: [[String]]
- parseSeason [] = []
- parseEpisodes :: ([Maybe Episode], [Tag String]) -> ([Maybe Episode], [Tag String])
- parseEpisodes (episodes,tags) =
- case (take 1 tags) of
- [TagOpen "Span" _] -> (episodes,tags)
- [TagText a] -> parseEpisodes $ (\parsed -> (episodes ++ [fst parsed], snd parsed)) $ parseEpisode tags
- _ -> (episodes,tags)
- parseEpisode :: [Tag String] -> (Maybe Episode, [Tag String])
- parseEpisode tags = (Just $ Episode episodeRunning episodeSeason episodeTitle episodeAirdate, tagList)
- where episodeRunning = extractTagText (take 1 tags)
- episodeSeason = extractTagText (take 1 $ drop 1 tags)
- episodeTitle = extractTitle (take 1 $ drop 2 tags)
- episodeAirdate = extractTagText (take 1 $ drop 3 tags)
- tagList = drop 4 tags
- extractFromAttrib :: Tag String -> Maybe String
- extractFromAttrib tag = (Just $ fromAttrib "title" tag)
- extractTagText :: [Tag String] -> Maybe String
- extractTagText [] = Nothing
- extractTagText (x:_) = maybeTagText x
- extractTitle :: [Tag String] -> Maybe String
- extractTitle (x:xs) =
- case x of
- TagText t -> maybeTagText x
- TagOpen a b -> extractFromAttrib x
- fetchPage :: String -> IO String
- fetchPage page = simpleHTTP (getRequest page) >>= getResponseBody
- getTags :: [Tag String] -> [Tag String]
- getTags (x:xs)
- | (~== (TagOpen "span" [("class","mw-headline")])) x = case ((fromAttrib "id" x) =~ "Season" :: Bool) of True -> [x] ++ getTags xs
- False -> getTags xs
- | (~== (TagOpen "td" [("id","")])) x = case ((fromAttrib "id" x) =~ "ep" :: Bool) of True -> (fst parse) ++ (getTags $ snd parse)
- False -> getTags xs
- |otherwise = getTags xs
- where parse = takeState 1 $ dropWhileState (~/= (TagOpen "span" [("class","bday dtstart published updated")])) $ takeState 1 $ dropState 2 $ dropWhileState (~/= (TagOpen "td" [("class","summary")])) $ takeState 1 $ dropWhileState(~/= (TagOpen "td" [])) ((take 1 xs),(drop 1 xs))
- getTags [] = []
- --dropState :: Int -> ([Tag str],[Tag str]) -> ([Tag str],[Tag str])
- dropState n (a,b)
- |n > 0 = (a, (drop n b))
- |otherwise = (a,b)
- --takeState :: Int -> ([Tag str],[Tag str]) -> ([Tag str],[Tag str])
- takeState n (a,b)
- |n > 0 = (a ++ (take n b), drop n b)
- |otherwise = (a,b)
- dropWhileState f (a,b) = (a,drop 1 $ dropWhile f b)
- main = do
- tags <- liftM parseTags $ fetchPage "http://en.wikipedia.org/wiki/List_of_Weeds_episodes"
- putStrLn $ show $ parseSeries $ getTags tags
- --putStrLn $ show tags
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement