Advertisement
Guest User

tagsip

a guest
May 6th, 2012
194
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import Control.Monad
  2. import Network.HTTP
  3. import Text.Regex.Posix
  4. import Text.HTML.TagSoup
  5. import Data.List
  6.  
  7. data Episode = Episode {
  8.   episodeRunning :: Maybe String,
  9.   episodeSeason :: Maybe String,
  10.   title :: Maybe String,
  11.   airDate :: Maybe String
  12.   }
  13.              deriving Show
  14.                      
  15. data Season = Season {
  16.   number :: Maybe String,
  17.   episodes :: [Maybe Episode]
  18.   }
  19.              deriving Show
  20.                      
  21. data Series = Series [Maybe Season]
  22.               deriving Show
  23.                        
  24. (!!!) :: [a] -> Int -> Maybe a
  25. list !!! index
  26.   | index < length list = Just $ list !! index
  27.   | otherwise = Nothing
  28.  
  29. parseSeries :: [Tag String] -> Series
  30. parseSeries tags = Series $ parseSeason tags
  31.  
  32. parseSeason :: [Tag String] -> [Maybe Season]
  33. parseSeason (x:xs) = (\episodes -> [Just $ Season ((extract !!! 0) >>= (!!! 0)) (fst episodes)] ++ parseSeason (snd episodes)) $ parseEpisodes ([],xs)
  34.   where extract = take 1 $ drop 1 $ nub $ (fromAttrib "id" x) =~ "[0-9]*" :: [[String]]
  35. parseSeason []     = []
  36.  
  37.  
  38.  
  39.                            
  40. parseEpisodes :: ([Maybe Episode], [Tag String]) -> ([Maybe Episode], [Tag String])
  41. parseEpisodes (episodes,tags) =
  42.   case (take 1 tags) of
  43.     [TagOpen "Span" _] -> (episodes,tags)
  44.     [TagText a] -> parseEpisodes $ (\parsed -> (episodes ++ [fst parsed], snd parsed)) $ parseEpisode tags
  45.     _ -> (episodes,tags)
  46.  
  47.              
  48. parseEpisode :: [Tag String] -> (Maybe Episode, [Tag String])
  49. parseEpisode tags = (Just $ Episode episodeRunning episodeSeason episodeTitle episodeAirdate, tagList)
  50.   where episodeRunning = extractTagText (take 1 tags)
  51.         episodeSeason  = extractTagText (take 1 $ drop 1 tags)
  52.         episodeTitle   = extractTitle (take 1 $ drop 2 tags)
  53.         episodeAirdate = extractTagText (take 1 $ drop 3 tags)
  54.         tagList        = drop 4 tags
  55.  
  56. extractFromAttrib :: Tag String -> Maybe String
  57. extractFromAttrib tag = (Just $ fromAttrib "title" tag)
  58.                      
  59.                        
  60. extractTagText :: [Tag String] -> Maybe String
  61. extractTagText [] = Nothing
  62. extractTagText (x:_) = maybeTagText x
  63.                        
  64. extractTitle :: [Tag String] -> Maybe String
  65. extractTitle (x:xs) =
  66.   case x of
  67.     TagText t -> maybeTagText x
  68.     TagOpen a b -> extractFromAttrib x
  69.  
  70. fetchPage :: String -> IO String
  71. fetchPage page = simpleHTTP (getRequest page) >>= getResponseBody
  72.  
  73. getTags :: [Tag String] -> [Tag String]
  74. getTags (x:xs)
  75.   | (~== (TagOpen "span" [("class","mw-headline")])) x = case ((fromAttrib "id" x) =~ "Season" :: Bool) of True ->  [x] ++ getTags xs
  76.                                                                                                            False -> getTags xs
  77.                                                                                                            
  78.   | (~== (TagOpen "td" [("id","")])) x = case ((fromAttrib "id" x) =~ "ep" :: Bool) of True -> (fst parse) ++ (getTags $ snd parse)
  79.                                                                                        False -> getTags xs
  80.   |otherwise = getTags xs                                    
  81.              where parse = takeState 1 $ dropWhileState (~/= (TagOpen "span" [("class","bday dtstart published updated")])) $ takeState 1  $ dropState 2  $ dropWhileState (~/= (TagOpen "td" [("class","summary")])) $ takeState 1 $ dropWhileState(~/= (TagOpen "td" [])) ((take 1 xs),(drop 1 xs))
  82. getTags [] = []
  83.  
  84.  
  85. --dropState :: Int -> ([Tag str],[Tag str]) -> ([Tag str],[Tag str])
  86. dropState n (a,b)
  87.   |n > 0  = (a, (drop n b))
  88.   |otherwise = (a,b)
  89.  
  90. --takeState :: Int -> ([Tag str],[Tag str]) -> ([Tag str],[Tag str])
  91. takeState n (a,b)
  92.   |n > 0 = (a ++ (take n b), drop n b)
  93.   |otherwise = (a,b)  
  94.                
  95. dropWhileState f (a,b) = (a,drop 1 $ dropWhile f b)
  96.  
  97. main = do
  98.   tags <- liftM parseTags $ fetchPage "http://en.wikipedia.org/wiki/List_of_Weeds_episodes"
  99.   putStrLn $ show $ parseSeries $ getTags tags
  100.   --putStrLn $ show tags
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement