Advertisement
Guest User

Untitled

a guest
Jun 26th, 2019
76
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.86 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on Mon Apr 15 13:39:19 2019
  4.  
  5. @author: GoodManMcGee
  6. """
  7.  
  8. import pandas as pd
  9. from sklearn.metrics import accuracy_score
  10. from sklearn import tree
  11. from sklearn.model_selection import train_test_split
  12. from sklearn import preprocessing
  13. from sklearn.metrics import confusion_matrix
  14. from sklearn.ensemble import RandomForestClassifier
  15. from IPython.display import Image
  16. from sklearn.tree import export_graphviz
  17. from treeinterpreter import treeinterpreter as ti
  18. import matplotlib.pyplot as plt
  19. import numpy as np
  20. import itertools
  21.  
  22. data = pd.read_csv("pancreatic_cancer_smokers.csv")
  23. target = data['case (1: case, 0: control)']
  24. data.drop('case (1: case, 0: control)', axis=1, inplace=True)
  25. x_train, x_test, y_train, y_test = train_test_split(data, target, test_size = 0.2)
  26. clf = RandomForestClassifier(n_estimators=100)
  27. clf.fit(x_train, y_train)
  28. y_pred = clf.predict(x_test)
  29. clf_accuracy = accuracy_score(y_test, y_pred)
  30. clf_pred, clf_bias, contributions = ti.predict(clf, x_test)
  31.  
  32.  
  33. #The code below was taken from DataDive's treeinterpreter tutorial.
  34. #The aforementioned messages applies to all code between the underscores
  35. #///////////////////////////////////////////
  36.  
  37. for i in range(len(x_test)):
  38. print ("Instance", i)
  39. print ("Bias (trainset mean)", clf_bias[i])
  40. print ("Feature contributions:")
  41. for c, feature in sorted(zip(contributions[i], data.feature_names),
  42. key=lambda x: -abs(x[0])):
  43. #An error occurs in the "data.feature_names" method in the code above:AttributeError: 'DataFrame' object has no attribute 'feature_names'. I have tried referenceing columns from datasets also, but that also leads to errors: ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
  44. print (feature, round(c, 2))
  45. print ("-"*20)
  46. #///////////////////////////////////////////
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement