Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- """
- Created on Mon Apr 15 13:39:19 2019
- @author: GoodManMcGee
- """
- import pandas as pd
- from sklearn.metrics import accuracy_score
- from sklearn import tree
- from sklearn.model_selection import train_test_split
- from sklearn import preprocessing
- from sklearn.metrics import confusion_matrix
- from sklearn.ensemble import RandomForestClassifier
- from IPython.display import Image
- from sklearn.tree import export_graphviz
- from treeinterpreter import treeinterpreter as ti
- import matplotlib.pyplot as plt
- import numpy as np
- import itertools
- data = pd.read_csv("pancreatic_cancer_smokers.csv")
- target = data['case (1: case, 0: control)']
- data.drop('case (1: case, 0: control)', axis=1, inplace=True)
- x_train, x_test, y_train, y_test = train_test_split(data, target, test_size = 0.2)
- clf = RandomForestClassifier(n_estimators=100)
- clf.fit(x_train, y_train)
- y_pred = clf.predict(x_test)
- clf_accuracy = accuracy_score(y_test, y_pred)
- clf_pred, clf_bias, contributions = ti.predict(clf, x_test)
- #The code below was taken from DataDive's treeinterpreter tutorial.
- #The aforementioned messages applies to all code between the underscores
- #///////////////////////////////////////////
- for i in range(len(x_test)):
- print ("Instance", i)
- print ("Bias (trainset mean)", clf_bias[i])
- print ("Feature contributions:")
- for c, feature in sorted(zip(contributions[i], data.feature_names),
- key=lambda x: -abs(x[0])):
- #An error occurs in the "data.feature_names" method in the code above:AttributeError: 'DataFrame' object has no attribute 'feature_names'. I have tried referenceing columns from datasets also, but that also leads to errors: ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
- print (feature, round(c, 2))
- print ("-"*20)
- #///////////////////////////////////////////
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement