Guest User

Untitled

a guest
May 25th, 2018
74
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.79 KB | None | 0 0
  1. (ns scraper
  2. (:use [clojure.contrib.duck-streams :only [spit]]
  3. [clojure.contrib.seq-utils :only [partition-all]])
  4. (:import (java.net URL)
  5. (java.io BufferedReader InputStreamReader FileReader)))
  6.  
  7. (def num-threads 20)
  8.  
  9. (defn scrape [url]
  10. (try
  11. (let [conn (URL. url)]
  12. (with-open [stream (.openStream conn)]
  13. (let [buf (BufferedReader. (InputStreamReader. stream))]
  14. (spit (.toString (java.util.UUID/randomUUID))
  15. (apply str (line-seq buf))))))
  16. (catch Exception e nil)))
  17.  
  18. (defn process-urls [list]
  19. (doseq [url list]
  20. (scrape url)))
  21.  
  22. (defn run [f]
  23. (let [list (line-seq (BufferedReader. (FileReader. f)))
  24. url-per-thread (int (inc (/ (count list) num-threads)))]
  25. (doseq [list (partition-all url-per-thread list)]
  26. (future (process-urls list)))))
  27.  
  28. ;;(run "url.list")
Add Comment
Please, Sign In to add comment