Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- (ns batch-pmap-wide-finder
- "A basic map/reduce approach to the wide finder using agents.
- Optimized for being idiomatic and readable rather than speed.
- Updated to deal with batches of lines instead of individual lines.
- "
- (:use [clojure.contrib.duck-streams :only [reader]]
- [clojure.contrib.seq-utils :only [partition-all]]))
- (def *batch-size* 50)
- ;;(def re #"GET /\S+-(\d+)-\S+ ")
- (def re #"GET /(\d+) ")
- (defn tally [line]
- (if-let [[_ hit] (re-find re line)]
- {hit 1}
- {}))
- (defn count-lines
- [lines]
- (apply merge-with + (map tally lines)))
- (defn find-widely
- "Return a map of pages to hit counts in filename."
- [filename]
- ;; each agent begins as an empty map.
- (apply merge-with +
- (pmap count-lines (partition *batch-size*
- (line-seq (reader filename))))))
- ;; Main
- (. System/out println
- (str (find-widely (first (rest *command-line-args*)))))
Add Comment
Please, Sign In to add comment