gnuvince
By: a guest | Nov 18th, 2008 | Syntax:
None | Size: 1.75 KB | Hits: 328 | Expires: Never
(import '(java.net URL)
'(java.lang StringBuilder)
'(java.io BufferedReader InputStreamReader)
'(org.htmlparser Parser)
'(org.htmlparser.visitors NodeVisitor)
'(org.htmlparser.tags ImageTag))
(def *comics*
[{:name "Penny-Arcade"
:url "http://www.penny-arcade.com/comic/"
:regex #"images/\d{4}/.+?(?:png|gif|jpg)"
:prefix "http://www.penny-arcade.com/"
}
{:name "We The Robots"
:url "http://www.wetherobots.com/"
:regex #"comics/.+?[.](?:jpg|png|gif)"
}
{:name "Xkcd"
:url "http://www.xkcd.com"
:regex #"comics"
:type :tooltip-comic
}
])
(defn fetch-url
"Return the web page as a string."
[address]
(let [url (URL. address)]
(with-open [stream (. url (openStream))]
(let [buf (BufferedReader. (InputStreamReader. stream))]
(apply str (line-seq buf))))))
(defmulti fetch-comic :type)
(defmethod fetch-comic :default [comic]
(let [src (fetch-url (:url comic))
image (re-find (:regex comic) src)]
(str (or (:prefix comic) (:url comic))
image)))
(defmethod fetch-comic :tooltip-comic [comic]
(let [img-tags (ref [])
parser (Parser. (:url comic))
visitor (proxy [NodeVisitor] []
(visitTag [tag]
(when (and (instance? ImageTag tag)
(re-find (:regex comic)
(.getImageURL tag)))
(dosync (alter img-tags conj tag)))))]
(.visitAllNodesWith parser visitor)
[(.getImageURL (first @img-tags))
(.getAttribute (first @img-tags) "title")]))
(doseq [comic *comics*]
(println (:name comic) ":" (fetch-comic comic)))