Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # obs is a zero-one vector of truth
- # prob is a vector resulting from "predict_proba"
- def makecost(obs,prob,falsepos_cost,falseneg_cost):
- def cost(cutoff):
- pred = np.array(prob > cutoff)
- fpos = pred * (1 - obs)
- fneg = (1 - pred) * obs
- return np.sum(fpos * falsepos_cost + fneg * falseneg_cost)
- return np.vectorize(cost)
- cut = np.linspace(0,1,100)
- cost = np.zeros_like(cut)
- from sklearn.model_selection import KFold, cross_val_predict
- obs = np.ravel(y)
- K = 20
- for j in range(K):
- folds = KFold(n_splits=5,shuffle=True)
- prob = cross_val_predict(logreg,X,np.ravel(y),cv=folds,method='predict_proba',n_jobs=5)[:,1]
- getcost = makecost(obs,prob,falsepos_cost=20,falseneg_cost=25)
- currentcost = getcost(cut)/X.shape[0]
- cost += currentcost
- plt.plot(cut, currentcost,c='C0',alpha=0.05)
- cost /= K
- plt.plot(cut,cost,c='C0')
- plt.xlabel('cutoff')
- plt.ylabel('Expected cost per data point');
- bestcut = cut[np.argmin(cost)]
- bestcut
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement