Advertisement
Guest User

Untitled

a guest
Jul 24th, 2017
65
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.35 KB | None | 0 0
  1. seed = 7
  2. DTC = DecisionTreeClassifier
  3. parameters = {'max_depth':range(3,10), 'max_leaf_nodes':range(10, 30), 'criterion': ['gini'], "splitter" : ["best"]}#, 'max_features':range(10,100)}
  4. dt = RandomizedSearchCV(DTC(random_state=seed), parameters, n_jobs=10, cv=kfold) #min_samples_leaf=10
  5. fit_dt= dt.fit(X_train, Y_train)
  6. print(dir(fit_dt))
  7. tree_model = dt.best_estimator_
  8. print (dt.best_score_, dt.best_params_, dt.error_score) #, dt.cv_results_)
  9. print('best estimators')
  10. print(fit_dt.best_estimator_)
  11.  
  12. features = tree_model.feature_importances_
  13. print(features)
  14.  
  15. rank = np.argsort(features)[::-1]
  16. print(rank[:12])
  17. print(sorted(list(zip(features))))
  18. #for items in fit_dt.feature_importances_:
  19. # print (items)
  20.  
  21. # Print best scores and best parameters
  22.  
  23. means = dt.cv_results_['mean_test_score']
  24. stds = dt.cv_results_['std_test_score']
  25. for mean, std, params in zip(means, stds, dt.cv_results_['params']):
  26. print("%0.3f (+/-%0.03f) for %r"
  27. % (mean, std * 2, params))
  28.  
  29. print('Best Score: {}'
  30. .format(dt.best_score_))
  31. print('Best params: {}'
  32. .format(dt.best_params_))
  33.  
  34. print('Accuracy of DT classifier on training set: {:.2f}'
  35. .format(dt.score(X_train, Y_train)))
  36. print('Accuracy of DT classifier on test set: {:.2f}'
  37. .format(dt.score(X_test, Y_test)))
  38.  
  39. predictions = dt.predict(X_test)
  40. print(np.column_stack((Y_test, np.round(predictions))))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement