Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- from sklearn.ensemble import RandomForestClassifier
- cancer = load_breast_cancer()
- cancer = load_breast_cancer()
- X, y, features, labels = cancer.data, cancer.target, cancer.feature_names, cancer.target_names
- print ('X.shape= ', X.shape)
- print ('y.shape= ', y.shape)
- X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 3)
- print ('X_train.shape= ',X_train.shape)
- print ('X_test.shape= ',X_test.shape)
- print ('y_train.shape= ',y_train.shape)
- print ('y_test.shape= ',y_test.shape)
- print ('features:', features)
- dict_names = {i:v for i,v in enumerate(labels)}
- print ('target names :', dict_names)
- clf = RandomForestClassifier().fit (X_train, y_train)
- clf.score (X_train, y_train)
- n_estimators_list= [5,10,20]
- max_features_list= list(np.arange(1, X_train.shape[1]+1))
- for i in range (len(n_estimators_list)):
- for j in range (len(max_features_list)):
- index = len(max_features_list)*i+j
- clf = RandomForestClassifier(
- n_estimators= n_estimators_list[i],
- max_features=max_features_list[j],
- ).fit(X_train, y_train)
- accuracy_train = clf.score (X_train, y_train)
- accuracy_test = clf.score (X_test, y_test)
- print ('n_estimators= {}, max_features = {}, accuracy_train = {:.3%}, accuracy_test = {:.3%}'.format (
- n_estimators_list[i],max_features_list[j], accuracy_train, accuracy_test))
- print ('Comparing to desicion tree clf')
- max_depth = 3
- clf = DecisionTreeClassifier(
- criterion= 'entropy',
- random_state=10,
- max_depth=max_depth,
- # max_leaf_nodes=4,
- ).fit(X_train, y_train)
- print("train accuracy= {:.3%}".format(clf.score (X_train, y_train)))
- print("test accuracy= {:.3%}".format(clf.score (X_test, y_test)))
Add Comment
Please, Sign In to add comment