Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- (ns clj-playground.enzyme-selection
- (:require [clojure.data.csv :as csv]
- [clojure.java.io :as io]))
- (defn read-enzyme-data
- "Read clone/analysis/enzyme data from CSV file filename. Group
- enzymes for each clone/analysis pair into a set and return a list of
- sets of enzymes"
- [filename]
- (with-open [in-file (io/reader filename)]
- (let [records (csv/read-csv in-file)
- colnames (map keyword (first records))]
- (loop [records (next records) accum {}]
- (if records
- (let [r (zipmap colnames (first records))
- k (str (:clone_name r) "." (:analysis r))]
- (recur (next records) (assoc accum k (conj (get accum k #{}) (:enzyme r)))))
- (vals accum))))))
- (defn min-enzyme-set
- "Find a minimal set of enzymes containing at least one enzyme from
- every set in enzyme-sets"
- ([enzyme-sets]
- (min-enzyme-set enzyme-sets #{}))
- ([enzyme-sets accum]
- (if (seq enzyme-sets)
- (let [enzyme-freqs (frequencies (apply concat enzyme-sets))
- most-popular-enzyme (reduce (fn [x y] (if (> (enzyme-freqs x) (enzyme-freqs y)) x y)) (keys enzyme-freqs))]
- (recur (remove #(contains? % most-popular-enzyme) enzyme-sets) (conj accum most-popular-enzyme)))
- accum)))
Add Comment
Please, Sign In to add comment