Advertisement
Shinmera

Simple Danbooru Pool Crawler

Dec 25th, 2013
228
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Lisp 2.62 KB | None | 0 0
  1. (defpackage danbooru
  2.   (:use :cl :lquery))
  3. (in-package :danbooru)
  4.  
  5. ;; Requires: lQuery, drakma, alexandria, cl-ppcre
  6.  
  7. (defvar *url* "http://www.donmai.us")
  8.  
  9. (defun replace-many (target replace-alist)
  10.   (loop for replace in replace-alist
  11.         do (setf target (cl-ppcre:regex-replace-all (car replace) target (cdr replace))))
  12.   target)
  13.  
  14. ;; Euch. Necessary to stop the HTML4 parser from stripping away the tags.
  15. (defun 2html4 (html)
  16.   (replace-many
  17.    html
  18.    `(("<section" . "<div data-html5=\"section\"")
  19.      ("</section" . "</div")
  20.      ("<article" . "<div data-html5=\"article\"")
  21.      ("</article" . "</div"))))
  22.  
  23. (defun crawl-pool-links (pool-id)
  24.   (let ((lquery:*lquery-master-document*))
  25.     (loop
  26.      with image-urls = ()
  27.      for next = (format NIL "/pools/~d" pool-id) then ($ ".paginator a[rel=\"next\"]" (attr :href) (node))
  28.      while next
  29.      do (format T "> Loading page ~a~a~%" *url* next)
  30.      ($ (initialize (2html4 (drakma:http-request (concatenate 'string *url* next))) :type :HTML))
  31.      (setf image-urls (append image-urls ($ "#c-pools .post-preview a" (attr :href))))
  32.      finally (return image-urls))))
  33.  
  34. (defun get-image-url (image-page-url)
  35.   (let ((lquery:*lquery-master-document*))
  36.     ($ (initialize (2html4 (drakma:http-request image-page-url)) :type :HTML))
  37.     (or ($ "#image-resize-notice a" (attr :href) (node))
  38.         ($ "#image" (attr :src) (node)))))
  39.  
  40. (defun crawl-pool-image-urls (pool-id)
  41.   (let ((lquery:*lquery-master-document*))
  42.     (mapcar #'(lambda (page)
  43.                 (format T "> Loading page ~a~a~%" *url* page)
  44.                 (get-image-url (concatenate 'string *url* page)))
  45.             (crawl-pool-links pool-id))))
  46.  
  47. (defun download-file (url pathname &key (if-exists :error))
  48.   (with-open-file (output pathname
  49.                           :direction :output :element-type '(unsigned-byte 8)
  50.                           :if-does-not-exist :create :if-exists if-exists)
  51.     (let ((input (drakma:http-request url :want-stream T)))
  52.       (alexandria:copy-stream input output :element-type '(unsigned-byte 8)))))
  53.  
  54. (defun download-pool (pool-id target-directory &optional (file-prefix "~d-"))
  55.   (loop with urls = (crawl-pool-image-urls pool-id)
  56.         for i from 1
  57.         for url in urls
  58.         for pathname = (merge-pathnames (concatenate 'string (format NIL file-prefix i)
  59.                                                      (subseq url (1+ (position #\/ url :from-end T)))) target-directory)
  60.         do (format T "> [~3d/~d] Downloading ~a~a to ~a~%" i (length urls) *url* url pathname)
  61.         (ignore-errors (download-file (concatenate 'string *url* url) pathname))))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement