Ross Thomas
By: a guest | Nov 13th, 2009 | Syntax:
Lisp | Size: 1.01 KB | Hits: 319 | Expires: Never
(use 'clojure.contrib.duck-streams)
;; Count instances of strings found in a file, parallelized using agents.
;;
;; See http://www.tbray.org/ongoing/When/200x/2009/11/11/Clojure-References
;;
;; - Ross Thomas <halfacanuck@gmail.com>
(def file-name "/etc/dictionaries-common/words")
(def re #"^..(...)")
(def top-n 10)
(def num-agents 100)
;; Define work-seq as lazy sequence of [line agent]
(def lines (read-lines file-name))
(def agent-pool (take num-agents (repeatedly #(agent {}))))
(def agents (cycle agent-pool))
(def work-seq (map vector lines agents))
;; Find match and update agent's state
(defn do-line [so-far line]
(if-let [[_ hit] (re-find re line)]
(assoc so-far hit (inc (get so-far hit 0)))
so-far))
;; Send work to agents, wait for them to finish
(doseq [[l a] work-seq] (send a do-line l))
(doseq [a agent-pool] (await a))
;; Produce final map
(def result (apply merge-with + (map deref agent-pool)))
(println (take top-n (reverse (sort-by val result))))