Advertisement
Guest User

Untitled

a guest
Sep 17th, 2019
117
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.69 KB | None | 0 0
  1. import itertools as itt
  2. from typing import List
  3. import numpy as np
  4. import pandas as pd
  5. import sklearn.metrics as metrics
  6. from sklearn.model_selection import StratifiedKFold
  7. import sys
  8.  
  9.  
  10. def score_model(
  11. X, y, model, n_splits=3, metric="auc", output=sys.stdout, echo=False
  12. ) -> float:
  13. """score_model
  14. score model will take in a feature feature data, target data, and a model to
  15. cross validate accross n_splits of StratifiedKFolds. Returns the average
  16. metric specified for the test set the default is "auc"
  17.  
  18. :param X: the feature data
  19. :param y: the target data
  20. :param model: the model to fit the data to
  21. :param n_splits: the number of k fold splits
  22. :param metric: metric to return "auc"
  23. :param output:
  24. :param echo:
  25. :rtype: float
  26. """
  27. result = {"auc": [], "accuracy": [], "f1": []}
  28. sk = StratifiedKFold(n_splits=n_splits)
  29. k = 0
  30. for train_index, test_index in sk.split(X, y):
  31. # K fold split
  32. X_train, X_test = X.iloc[train_index], X.iloc[test_index]
  33. y_train, y_test = y.iloc[train_index], y.iloc[test_index]
  34.  
  35. if echo or output == sys.stdout:
  36. print("training...", end=" ", file=sys.stdout)
  37. model.fit(X_train, y_train)
  38. print("done", file=sys.stdout)
  39.  
  40. print("===== K FOLD %d =====" % k, file=output)
  41. train_preds = model.predict(X_train)
  42. test_preds = model.predict(X_test)
  43.  
  44. print("ACCURACY", file=output)
  45. print(train_preds)
  46. train_accurracy = metrics.accuracy_score(
  47. y_true=y_train, y_pred=train_preds.round()
  48. )
  49. test_accurracy = metrics.accuracy_score(
  50. y_true=y_test, y_pred=test_preds.round()
  51. )
  52. print("\ttrain: %f" % train_accurracy, file=output)
  53. print("\ttest: %f" % test_accurracy, file=output)
  54. result["accuracy"].append(test_accurracy)
  55.  
  56. print("AUC", file=output)
  57. train_fpr, train_tpr, _ = metrics.roc_curve(y_train, train_preds)
  58. train_auc = metrics.auc(train_fpr, train_tpr)
  59. print("\ttrain: %f" % train_auc, file=output)
  60. test_fpr, test_tpr, _ = metrics.roc_curve(y_test, test_preds)
  61. test_auc = metrics.auc(test_fpr, test_tpr)
  62. print("\ttest: %f" % test_auc, file=output)
  63. result["auc"].append(test_auc)
  64.  
  65. print("F1", file=output)
  66. train_f1 = metrics.f1_score(y_train, train_preds.round())
  67. print("\ttrain: %f" % train_f1, file=output)
  68. test_f1 = metrics.f1_score(y_test, test_preds.round())
  69. print("\ttest: %f" % test_f1, file=output)
  70. result["f1"].append(test_f1)
  71.  
  72. k += 1
  73. return sum(result[metric]) / len(result[metric])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement