Share Pastebin
Guest
Public paste!

gnuvince

By: a guest | Nov 18th, 2008 | Syntax: None | Size: 1.75 KB | Hits: 328 | Expires: Never
Copy text to clipboard
  1. (import '(java.net URL)
  2.         '(java.lang StringBuilder)
  3.         '(java.io BufferedReader InputStreamReader)
  4.         '(org.htmlparser Parser)
  5.         '(org.htmlparser.visitors NodeVisitor)
  6.         '(org.htmlparser.tags ImageTag))
  7.  
  8. (def *comics*
  9.   [{:name "Penny-Arcade"
  10.     :url "http://www.penny-arcade.com/comic/"
  11.     :regex #"images/\d{4}/.+?(?:png|gif|jpg)"
  12.     :prefix "http://www.penny-arcade.com/"
  13.     }
  14.    {:name "We The Robots"
  15.     :url "http://www.wetherobots.com/"
  16.     :regex #"comics/.+?[.](?:jpg|png|gif)"
  17.     }
  18.    {:name "Xkcd"
  19.     :url "http://www.xkcd.com"
  20.     :regex #"comics"
  21.     :type :tooltip-comic
  22.     }
  23.   ])
  24.  
  25. (defn fetch-url
  26.   "Return the web page as a string."
  27.   [address]
  28.   (let [url (URL. address)]
  29.     (with-open [stream (. url (openStream))]
  30.       (let [buf (BufferedReader. (InputStreamReader. stream))]
  31.         (apply str (line-seq buf))))))
  32.  
  33. (defmulti fetch-comic :type)
  34.  
  35. (defmethod fetch-comic :default [comic]
  36.   (let [src (fetch-url (:url comic))
  37.         image (re-find (:regex comic) src)]
  38.     (str (or (:prefix comic) (:url comic))
  39.          image)))
  40.  
  41. (defmethod fetch-comic :tooltip-comic [comic]
  42.   (let [img-tags (ref [])
  43.         parser (Parser. (:url comic))
  44.         visitor (proxy [NodeVisitor] []
  45.                   (visitTag [tag]
  46.                             (when (and (instance? ImageTag tag)
  47.                                        (re-find (:regex comic)
  48.                                                 (.getImageURL tag)))
  49.                               (dosync (alter img-tags conj tag)))))]
  50.     (.visitAllNodesWith parser visitor)
  51.     [(.getImageURL (first @img-tags))
  52.      (.getAttribute (first @img-tags) "title")]))
  53.  
  54. (doseq [comic *comics*]
  55.   (println (:name comic) ":" (fetch-comic comic)))