Advertisement
Guest User

Untitled

a guest
Aug 13th, 2019
98
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.96 KB | None | 0 0
  1. # obs is a zero-one vector of truth
  2. # prob is a vector resulting from "predict_proba"
  3. def makecost(obs,prob,falsepos_cost,falseneg_cost):
  4. def cost(cutoff):
  5. pred = np.array(prob > cutoff)
  6. fpos = pred * (1 - obs)
  7. fneg = (1 - pred) * obs
  8. return np.sum(fpos * falsepos_cost + fneg * falseneg_cost)
  9. return np.vectorize(cost)
  10.  
  11. cut = np.linspace(0,1,100)
  12. cost = np.zeros_like(cut)
  13. from sklearn.model_selection import KFold, cross_val_predict
  14. obs = np.ravel(y)
  15.  
  16. K = 20
  17. for j in range(K):
  18. folds = KFold(n_splits=5,shuffle=True)
  19. prob = cross_val_predict(logreg,X,np.ravel(y),cv=folds,method='predict_proba',n_jobs=5)[:,1]
  20. getcost = makecost(obs,prob,falsepos_cost=20,falseneg_cost=25)
  21. currentcost = getcost(cut)/X.shape[0]
  22. cost += currentcost
  23. plt.plot(cut, currentcost,c='C0',alpha=0.05)
  24. cost /= K
  25. plt.plot(cut,cost,c='C0')
  26. plt.xlabel('cutoff')
  27. plt.ylabel('Expected cost per data point');
  28.  
  29. bestcut = cut[np.argmin(cost)]
  30. bestcut
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement