Advertisement
Shinmera

Kanjidamage Crawler

Dec 2nd, 2013
133
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Lisp 1.49 KB | None | 0 0
  1. (defparameter *base-page* "http://kanjidamage.com")
  2. (defparameter *first-page* "/kanji/1")
  3.  
  4. (defun crawl (&key (page *first-page*) limit)
  5.   (loop for i from 0
  6.      while (and page (or (not limit) (< i limit)))
  7.      do (format T "Processing page: ~a~%" page)
  8.        ($ (initialize (drakma:http-request (format NIL "~a~a" *base-page* page) :external-format-in :utf-8) :type :HTML))
  9.        (setf page ($ ".navigation-header .text-righted a" (attr :href) (node)))
  10.      collect (process-page)))
  11.  
  12. (defun process-page ()
  13.   (flet ((process-table (tables names type)
  14.            (let ((pos (position type names :test #'string=)))
  15.              (when pos
  16.                (loop for row in ($ (inline (nth pos tables)) "tr")
  17.                   collect ($ row "td" (text) (each #'(lambda (a) (string-trim (format NIL " ~%★☆\"") a)) :replace T)))))))
  18.     (let* ((container ($ "body>.container" (eq 1)))
  19.            (header ($ container ".row" (eq 1)))
  20.            (tables ($ container "table.definition") )
  21.            (names ($ container ".span12 h2" (text))))
  22.       (list :number (parse-integer (string-trim (format NIL " ~%Number") ($ container ".navigation-header .text-centered" (text) (node))) :junk-allowed T)
  23.             :character ($ header ".kanji_character" (text) (node))
  24.             :composition ($ header".span8 a" (text))
  25.             :name ($ header ".translation" (text) (node))
  26.             :onyomi (process-table tables names "Onyomi")
  27.             :kunyomi (process-table tables names "Kunyomi")))))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement