Want more features on Pastebin? Sign Up, it's FREE!
Guest

Untitled

By: a guest on Sep 4th, 2010  |  syntax: Haskell  |  size: 2.08 KB  |  views: 165  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. import Data.IORef
  2. import Network.HTTP
  3. import Network.Browser
  4. import Network.URI
  5. import Data.Maybe
  6. import Control.Monad
  7. import Data.List
  8. import Text.Regex.TDFA
  9. import Control.Concurrent
  10.  
  11. pageUrl off = URI "http:" (Just $ URIAuth "" "www.interpals.net" "") "/dosearch.php" ("?todo=search&sec=adv&age1=15&age2=18&sex[]=FEMALE&lfor[]=lfor_email&lfor[]=lfor_snail&lfor[]=lfor_langex&lfor[]=lfor_friend&lfor[]=lfor_flirt&lfor[]=lfor_relation&countries[]=AT&countries[]=DE&countries[]=CH&state=&languages[]=any&keywords=&sort=p.last_login+DESC&offset="++(show off)) ""
  12.  
  13. getPage     :: URI -> BrowserAction (HandleStream [Char]) String
  14. getPage uri = do
  15.     setErrHandler $ const $ return ()
  16.     setOutHandler $ const $ return ()
  17.     (_,s) <- request $ Request (uri) GET
  18.         [Header HdrCookie "__ubic1=MTE3ODM0NDM0MTRjN2RkYTA1OTAzMmU4LjkxODE1Njk2; __utma=46363135.421215970.1283316265.1283538085.1283541700.10; __utmz=46363135.1283316265.1.1.utmccn=(direct)|utmcsr=(direct)|utmcmd=(none); __utmc=46363135; PHPSESSID=59a130c66d4853f85289852f15cefa3a; resolution=1920x1080; ip_auto_login[login]=cap11235; ip_auto_login[password_md5]=NDM0NWM0NDlkZTg4MjRkMWVhZmJmZWNiZTQwOWQ4YTE%3D; __utmb=46363135"] ""
  19.     return $ rspBody s
  20.  
  21. getPeople :: Int -> BrowserAction (HandleStream [Char]) ([String], Int)
  22. getPeople off = do
  23.     s <- getPage (pageUrl off)
  24.     let t = (s=~"<a href='/([^?.]+)\\?")::[[String]]
  25.     let next = if length t > 0 then off+10 else 0
  26.     return (nub $ map (!!1) t, next)
  27.  
  28. personUrl :: String -> URI
  29. personUrl name = fromJust $ parseURI ("http://www.interpals.net/"++name)
  30.  
  31. viewPerson :: String -> BrowserAction (HandleStream [Char]) ()
  32. viewPerson name = do
  33.     _ <- getPage $ personUrl name
  34.     return ()
  35.  
  36. doCycle :: IORef (Int, Int) -> IO ()
  37. doCycle r = do
  38.     (count, off) <- readIORef r
  39.     (people, newOff) <- browse $  getPeople off
  40.     mapM_ (forkIO . browse . viewPerson) people
  41.     let newCount = count + (length people)
  42.     writeIORef r (newCount, if newOff<2000 then newOff else 0)
  43.     print newCount
  44.     doCycle r
  45.  
  46. main = do
  47.     t <- newIORef (0,0)
  48.     doCycle t
clone this paste RAW Paste Data