Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- (defpackage danbooru
- (:use :cl :lquery))
- (in-package :danbooru)
- ;; Requires: lQuery, drakma, alexandria, cl-ppcre
- (defvar *url* "http://www.donmai.us")
- (defun replace-many (target replace-alist)
- (loop for replace in replace-alist
- do (setf target (cl-ppcre:regex-replace-all (car replace) target (cdr replace))))
- target)
- ;; Euch. Necessary to stop the HTML4 parser from stripping away the tags.
- (defun 2html4 (html)
- (replace-many
- html
- `(("<section" . "<div data-html5=\"section\"")
- ("</section" . "</div")
- ("<article" . "<div data-html5=\"article\"")
- ("</article" . "</div"))))
- (defun crawl-pool-links (pool-id)
- (let ((lquery:*lquery-master-document*))
- (loop
- with image-urls = ()
- for next = (format NIL "/pools/~d" pool-id) then ($ ".paginator a[rel=\"next\"]" (attr :href) (node))
- while next
- do (format T "> Loading page ~a~a~%" *url* next)
- ($ (initialize (2html4 (drakma:http-request (concatenate 'string *url* next))) :type :HTML))
- (setf image-urls (append image-urls ($ "#c-pools .post-preview a" (attr :href))))
- finally (return image-urls))))
- (defun get-image-url (image-page-url)
- (let ((lquery:*lquery-master-document*))
- ($ (initialize (2html4 (drakma:http-request image-page-url)) :type :HTML))
- (or ($ "#image-resize-notice a" (attr :href) (node))
- ($ "#image" (attr :src) (node)))))
- (defun crawl-pool-image-urls (pool-id)
- (let ((lquery:*lquery-master-document*))
- (mapcar #'(lambda (page)
- (format T "> Loading page ~a~a~%" *url* page)
- (get-image-url (concatenate 'string *url* page)))
- (crawl-pool-links pool-id))))
- (defun download-file (url pathname &key (if-exists :error))
- (with-open-file (output pathname
- :direction :output :element-type '(unsigned-byte 8)
- :if-does-not-exist :create :if-exists if-exists)
- (let ((input (drakma:http-request url :want-stream T)))
- (alexandria:copy-stream input output :element-type '(unsigned-byte 8)))))
- (defun download-pool (pool-id target-directory &optional (file-prefix "~d-"))
- (loop with urls = (crawl-pool-image-urls pool-id)
- for i from 1
- for url in urls
- for pathname = (merge-pathnames (concatenate 'string (format NIL file-prefix i)
- (subseq url (1+ (position #\/ url :from-end T)))) target-directory)
- do (format T "> [~3d/~d] Downloading ~a~a to ~a~%" i (length urls) *url* url pathname)
- (ignore-errors (download-file (concatenate 'string *url* url) pathname))))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement