Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- seed = 7
- DTC = DecisionTreeClassifier
- parameters = {'max_depth':range(3,10), 'max_leaf_nodes':range(10, 30), 'criterion': ['gini'], "splitter" : ["best"]}#, 'max_features':range(10,100)}
- dt = RandomizedSearchCV(DTC(random_state=seed), parameters, n_jobs=10, cv=kfold) #min_samples_leaf=10
- fit_dt= dt.fit(X_train, Y_train)
- print(dir(fit_dt))
- tree_model = dt.best_estimator_
- print (dt.best_score_, dt.best_params_, dt.error_score) #, dt.cv_results_)
- print('best estimators')
- print(fit_dt.best_estimator_)
- features = tree_model.feature_importances_
- print(features)
- rank = np.argsort(features)[::-1]
- print(rank[:12])
- print(sorted(list(zip(features))))
- #for items in fit_dt.feature_importances_:
- # print (items)
- # Print best scores and best parameters
- means = dt.cv_results_['mean_test_score']
- stds = dt.cv_results_['std_test_score']
- for mean, std, params in zip(means, stds, dt.cv_results_['params']):
- print("%0.3f (+/-%0.03f) for %r"
- % (mean, std * 2, params))
- print('Best Score: {}'
- .format(dt.best_score_))
- print('Best params: {}'
- .format(dt.best_params_))
- print('Accuracy of DT classifier on training set: {:.2f}'
- .format(dt.score(X_train, Y_train)))
- print('Accuracy of DT classifier on test set: {:.2f}'
- .format(dt.score(X_test, Y_test)))
- predictions = dt.predict(X_test)
- print(np.column_stack((Y_test, np.round(predictions))))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement