Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- (defparameter *base-page* "http://kanjidamage.com")
- (defparameter *first-page* "/kanji/1")
- (defun crawl (&key (page *first-page*) limit)
- (loop for i from 0
- while (and page (or (not limit) (< i limit)))
- do (format T "Processing page: ~a~%" page)
- ($ (initialize (drakma:http-request (format NIL "~a~a" *base-page* page) :external-format-in :utf-8) :type :HTML))
- (setf page ($ ".navigation-header .text-righted a" (attr :href) (node)))
- collect (process-page)))
- (defun process-page ()
- (flet ((process-table (tables names type)
- (let ((pos (position type names :test #'string=)))
- (when pos
- (loop for row in ($ (inline (nth pos tables)) "tr")
- collect ($ row "td" (text) (each #'(lambda (a) (string-trim (format NIL " ~%★☆\"") a)) :replace T)))))))
- (let* ((container ($ "body>.container" (eq 1)))
- (header ($ container ".row" (eq 1)))
- (tables ($ container "table.definition") )
- (names ($ container ".span12 h2" (text))))
- (list :number (parse-integer (string-trim (format NIL " ~%Number") ($ container ".navigation-header .text-centered" (text) (node))) :junk-allowed T)
- :character ($ header ".kanji_character" (text) (node))
- :composition ($ header".span8 a" (text))
- :name ($ header ".translation" (text) (node))
- :onyomi (process-table tables names "Onyomi")
- :kunyomi (process-table tables names "Kunyomi")))))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement