Advertisement
Guest User

Untitled

a guest
Oct 17th, 2017
60
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.86 KB | None | 0 0
  1. import sys
  2. import pickle
  3. sys.path.append("../tools/")
  4. from feature_format import featureFormat, targetFeatureSplit
  5. from tester import dump_classifier_and_data
  6. from sklearn.preprocessing import MinMaxScaler, StandardScaler
  7. from sklearn.feature_selection import SelectKBest, f_classif
  8. from sklearn.tree import DecisionTreeClassifier
  9. from sklearn.naive_bayes import GaussianNB
  10. from sklearn.pipeline import Pipeline
  11. from sklearn.cross_validation import train_test_split,StratifiedShuffleSplit
  12. from sklearn.grid_search import GridSearchCV
  13. from sklearn.neighbors import KNeighborsClassifier
  14. from sklearn.decomposition import PCA
  15. from sklearn.svm import SVC
  16. from sklearn.metrics import accuracy_score
  17.  
  18.  
  19. ### Load the dictionary containing the dataset
  20. with open("final_project_dataset.pkl", "r") as data_file:
  21. data_dict = pickle.load(data_file)
  22.  
  23.  
  24. data_dict.pop("TOTAL",0)
  25. data_dict.pop("BANNANTINE JAMES M", 0)
  26. data_dict.pop("GRAY RODNEY", 0)
  27.  
  28.  
  29. def test_classifier(clf, dataset, features_list, folds = 1000):
  30. data = featureFormat(dataset, features_list, sort_keys = True)
  31. labels, features = targetFeatureSplit(data)
  32. cv = StratifiedShuffleSplit(labels, folds, random_state = 42)
  33. true_negatives = 0
  34. false_negatives = 0
  35. true_positives = 0
  36. false_positives = 0
  37. for train_idx, test_idx in cv:
  38. features_train = []
  39. features_test = []
  40. labels_train = []
  41. labels_test = []
  42. for ii in train_idx:
  43. features_train.append( features[ii] )
  44. labels_train.append( labels[ii] )
  45. for jj in test_idx:
  46. features_test.append( features[jj] )
  47. labels_test.append( labels[jj] )
  48.  
  49. ### fit the classifier using training set, and test on test set
  50. clf.fit(features_train, labels_train)
  51. predictions = clf.predict(features_test)
  52. for prediction, truth in zip(predictions, labels_test):
  53. if prediction == 0 and truth == 0:
  54. true_negatives += 1
  55. elif prediction == 0 and truth == 1:
  56. false_negatives += 1
  57. elif prediction == 1 and truth == 0:
  58. false_positives += 1
  59. elif prediction == 1 and truth == 1:
  60. true_positives += 1
  61. else:
  62. print "Warning: Found a predicted label not == 0 or 1."
  63. print "All predictions should take value 0 or 1."
  64. print "Evaluating performance for processed predictions:"
  65. break
  66. try:
  67. total_predictions = true_negatives + false_negatives + false_positives + true_positives
  68. accuracy = 1.0*(true_positives + true_negatives)/total_predictions
  69. precision = 1.0*true_positives/(true_positives+false_positives)
  70. recall = 1.0*true_positives/(true_positives+false_negatives)
  71. f1 = 2.0 * true_positives/(2*true_positives + false_positives+false_negatives)
  72. f2 = (1+2.0*2.0) * precision*recall/(4*precision + recall)
  73. print clf
  74. print PERF_FORMAT_STRING.format(accuracy, precision, recall, f1, f2, display_precision = 5)
  75. print RESULTS_FORMAT_STRING.format(total_predictions, true_positives, false_positives, false_negatives, true_negatives)
  76. print ""
  77. except:
  78. print "Got a divide by zero when trying out:", clf
  79. print "Precision or recall may be undefined due to a lack of true positive predicitons."
  80.  
  81.  
  82.  
  83. features_list = ['poi',
  84. 'salary',
  85. 'exercised_stock_options',
  86. 'bonus',
  87. 'total_stock_value',
  88. 'deferred_income'
  89. ]
  90.  
  91. my_dataset = data_dict
  92. data = featureFormat(my_dataset, features_list, sort_keys = True)
  93. labels, features = targetFeatureSplit(data)
  94.  
  95. clf = GaussianNB()
  96.  
  97. clf.fit(features_train, labels_train)
  98. test_classifier(clf, my_dataset, features_list)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement