Untitled

import numpy as np
from sklearn.ensemble import RandomForestClassifier

cancer = load_breast_cancer()

cancer = load_breast_cancer()
X, y, features, labels = cancer.data, cancer.target, cancer.feature_names, cancer.target_names
print ('X.shape= ', X.shape)
print ('y.shape= ', y.shape)
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 3)
print ('X_train.shape= ',X_train.shape)
print ('X_test.shape= ',X_test.shape)
print ('y_train.shape= ',y_train.shape)
print ('y_test.shape= ',y_test.shape)
print ('features:', features)
dict_names = {i:v for i,v in enumerate(labels)}
print ('target names :', dict_names)

clf = RandomForestClassifier().fit (X_train, y_train)
clf.score (X_train, y_train)
n_estimators_list= [5,10,20]
max_features_list= list(np.arange(1, X_train.shape[1]+1))

for i in range (len(n_estimators_list)):
    for j in range (len(max_features_list)):
        index = len(max_features_list)*i+j
        clf = RandomForestClassifier(
            n_estimators= n_estimators_list[i],
            max_features=max_features_list[j],
        ).fit(X_train, y_train)
        accuracy_train = clf.score (X_train, y_train)
        accuracy_test = clf.score (X_test, y_test)
        print ('n_estimators= {}, max_features = {}, accuracy_train = {:.3%}, accuracy_test = {:.3%}'.format (
            n_estimators_list[i],max_features_list[j], accuracy_train, accuracy_test))

print ('Comparing  to desicion tree clf')
max_depth = 3
clf = DecisionTreeClassifier(
    criterion=  'entropy',
    random_state=10,
    max_depth=max_depth,
#     max_leaf_nodes=4,
).fit(X_train, y_train)
print("train accuracy= {:.3%}".format(clf.score (X_train, y_train)))
print("test accuracy= {:.3%}".format(clf.score (X_test, y_test)))