Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import itertools as itt
- from typing import List
- import numpy as np
- import pandas as pd
- import sklearn.metrics as metrics
- from sklearn.model_selection import StratifiedKFold
- import sys
- def score_model(
- X, y, model, n_splits=3, metric="auc", output=sys.stdout, echo=False
- ) -> float:
- """score_model
- score model will take in a feature feature data, target data, and a model to
- cross validate accross n_splits of StratifiedKFolds. Returns the average
- metric specified for the test set the default is "auc"
- :param X: the feature data
- :param y: the target data
- :param model: the model to fit the data to
- :param n_splits: the number of k fold splits
- :param metric: metric to return "auc"
- :param output:
- :param echo:
- :rtype: float
- """
- result = {"auc": [], "accuracy": [], "f1": []}
- sk = StratifiedKFold(n_splits=n_splits)
- k = 0
- for train_index, test_index in sk.split(X, y):
- # K fold split
- X_train, X_test = X.iloc[train_index], X.iloc[test_index]
- y_train, y_test = y.iloc[train_index], y.iloc[test_index]
- if echo or output == sys.stdout:
- print("training...", end=" ", file=sys.stdout)
- model.fit(X_train, y_train)
- print("done", file=sys.stdout)
- print("===== K FOLD %d =====" % k, file=output)
- train_preds = model.predict(X_train)
- test_preds = model.predict(X_test)
- print("ACCURACY", file=output)
- print(train_preds)
- train_accurracy = metrics.accuracy_score(
- y_true=y_train, y_pred=train_preds.round()
- )
- test_accurracy = metrics.accuracy_score(
- y_true=y_test, y_pred=test_preds.round()
- )
- print("\ttrain: %f" % train_accurracy, file=output)
- print("\ttest: %f" % test_accurracy, file=output)
- result["accuracy"].append(test_accurracy)
- print("AUC", file=output)
- train_fpr, train_tpr, _ = metrics.roc_curve(y_train, train_preds)
- train_auc = metrics.auc(train_fpr, train_tpr)
- print("\ttrain: %f" % train_auc, file=output)
- test_fpr, test_tpr, _ = metrics.roc_curve(y_test, test_preds)
- test_auc = metrics.auc(test_fpr, test_tpr)
- print("\ttest: %f" % test_auc, file=output)
- result["auc"].append(test_auc)
- print("F1", file=output)
- train_f1 = metrics.f1_score(y_train, train_preds.round())
- print("\ttrain: %f" % train_f1, file=output)
- test_f1 = metrics.f1_score(y_test, test_preds.round())
- print("\ttest: %f" % test_f1, file=output)
- result["f1"].append(test_f1)
- k += 1
- return sum(result[metric]) / len(result[metric])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement