- (import '(java.net URL)
- '(java.lang StringBuilder)
- '(java.io BufferedReader InputStreamReader)
- '(org.htmlparser Parser)
- '(org.htmlparser.visitors NodeVisitor)
- '(org.htmlparser.tags ImageTag))
- (def *comics*
- [{:name "Penny-Arcade"
- :url "http://www.penny-arcade.com/comic/"
- :regex #"images/\d{4}/.+?(?:png|gif|jpg)"
- :prefix "http://www.penny-arcade.com/"
- }
- {:name "We The Robots"
- :url "http://www.wetherobots.com/"
- :regex #"comics/.+?[.](?:jpg|png|gif)"
- }
- {:name "Xkcd"
- :url "http://www.xkcd.com"
- :regex #"comics"
- :type :tooltip-comic
- }
- ])
- (defn fetch-url
- "Return the web page as a string."
- [address]
- (let [url (URL. address)]
- (with-open [stream (. url (openStream))]
- (let [buf (BufferedReader. (InputStreamReader. stream))]
- (apply str (line-seq buf))))))
- (defmulti fetch-comic :type)
- (defmethod fetch-comic :default [comic]
- (let [src (fetch-url (:url comic))
- image (re-find (:regex comic) src)]
- (str (or (:prefix comic) (:url comic))
- image)))
- (defmethod fetch-comic :tooltip-comic [comic]
- (let [img-tags (ref [])
- parser (Parser. (:url comic))
- visitor (proxy [NodeVisitor] []
- (visitTag [tag]
- (when (and (instance? ImageTag tag)
- (re-find (:regex comic)
- (.getImageURL tag)))
- (dosync (alter img-tags conj tag)))))]
- (.visitAllNodesWith parser visitor)
- [(.getImageURL (first @img-tags))
- (.getAttribute (first @img-tags) "title")]))
- (doseq [comic *comics*]
- (println (:name comic) ":" (fetch-comic comic)))
Posted by gnuvince on Wed 19 Nov 03:11
report abuse | View followups from Anonymous | download | new post
Submit a correction or amendment below (click here to make a fresh posting)
After submitting an amendment, you'll be able to view the differences between the old and new posts easily.