Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- (ns numerai.exploit
- (:require [clojure-csv.core :as csv])
- (:require [clustering.core.k-means :as k-means]))
- ;;
- ;; Read in data, define output directories
- ;;
- (def evol-dir (str "results/week" 99 "/"))
- (def outp-dir (str "results/week" 99 "/"))
- (def tournament (rest (csv/parse-csv (slurp "data/numerai_tournament_data.csv"))))
- (def predictions (into {} (rest (csv/parse-csv (slurp "data/example_predictions.csv")))))
- (def ids (map first tournament))
- ;;
- ;; Helper Functions
- ;;
- (defn logloss [coll]
- (let [scores (map (fn [[g s]] (if (= "1" g) s (- 1.0 s))) coll)]
- (float (/ (reduce + (map #(Math/log %) scores)) (* -1 (count scores))))))
- ;;
- ;; How big is live
- ;;
- (count (filter #(= (nth % 2) "live") tournament))
- ;;
- ;; live is 4386 entries.
- ;;
- ;; What happens if we randomly predict all live scores to be wrong, except the final one.
- ;;
- (logloss (map vector (repeat 4386 "1") (conj (repeat 4385 0.49999) 0.9)))
- ;;
- ;; We still pass logloss. So if we submit two predictions, one with the final live score as "0.9" and one with "0.1", we can't lose.
- ;; We use the default predictions for all but the last tournament entry, but put the prediction limits to 0.49999 and 0.50001.
- (def goals (map #(if (= (nth % 2) "live")
- (min 0.50001 (max 0.49999 (read-string (predictions (first %)))))
- (min 0.50001 (max 0.49999 (read-string (predictions (first %))))))
- tournament))
- (spit (str outp-dir "JINX-pos" ".csv")
- (csv/write-csv (concat (list ["id" "probability"])
- (map #(list (str %1) (str %2))
- ids
- (concat (butlast goals) [0.9])))))
- (spit (str outp-dir "JINX-neg" ".csv")
- (csv/write-csv (concat (list ["id" "probability"])
- (map #(list (str %1) (str %2))
- ids
- (concat (butlast goals) [0.1])))))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement