• Sign Up
• Login
• API
• FAQ
• Tools
• Archive
SHARE
TWEET # Untitled a guest Aug 13th, 2019 67 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. # obs is a zero-one vector of truth
2. # prob is a vector resulting from "predict_proba"
3. def makecost(obs,prob,falsepos_cost,falseneg_cost):
4.     def cost(cutoff):
5.         pred = np.array(prob > cutoff)
6.         fpos = pred * (1 - obs)
7.         fneg = (1 - pred) * obs
8.         return np.sum(fpos * falsepos_cost + fneg * falseneg_cost)
9.     return np.vectorize(cost)
10.
11. cut = np.linspace(0,1,100)
12. cost = np.zeros_like(cut)
13. from sklearn.model_selection import KFold, cross_val_predict
14. obs = np.ravel(y)
15.
16. K = 20
17. for j in range(K):
18.     folds = KFold(n_splits=5,shuffle=True)
19.     prob = cross_val_predict(logreg,X,np.ravel(y),cv=folds,method='predict_proba',n_jobs=5)[:,1]
20.     getcost = makecost(obs,prob,falsepos_cost=20,falseneg_cost=25)
21.     currentcost = getcost(cut)/X.shape
22.     cost += currentcost
23.     plt.plot(cut, currentcost,c='C0',alpha=0.05)
24. cost /= K
25. plt.plot(cut,cost,c='C0')
26. plt.xlabel('cutoff')
27. plt.ylabel('Expected cost per data point');
28.
29. bestcut = cut[np.argmin(cost)]
30. bestcut
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy.

Top