Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from sklearn import tree
- from sklearn.model_selection import train_test_split
- import numpy as np
- import ipdb
- np.random.seed(0)
- X = np.random.random([500,2])
- Y = []
- for x_value in X:
- y_value = 0
- if (x_value[0] > 0.5) and (x_value[1] > 0.5): # AND "logic"
- y_value = 1
- #random part
- #if np.random.random() > 0.9:
- if np.random.random() > 0.9:
- y_value = not y_value
- Y.append(y_value)
- Y = np.array(Y)
- feature_names = ['x1','x2']
- target_names = ['falso','verdadeiro']
- X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 0)
- # criar a árvore inicial
- clf = tree.DecisionTreeClassifier()
- # encontrar os elos fracos (valores de alfa onde as "mudanças ocorrem")
- path = clf.cost_complexity_pruning_path(X_train, Y_train)
- ccp_alphas, impurities = path.ccp_alphas, path.impurities
- # criar uma árvore para cada valor de alfa
- clfs = []
- for ccp_alpha in ccp_alphas:
- clf = tree.DecisionTreeClassifier(random_state=0, ccp_alpha=ccp_alpha)
- clf.fit(X_train, Y_train)
- clfs.append(clf)
- train_scores = [clf.score(X_train, Y_train) for clf in clfs]
- test_scores = [clf.score(X_test, Y_test) for clf in clfs]
- ipdb.set_trace()
- # escolher a melhor
- #.
- #.
- #.
- clf = clf.fit(X_train, Y_train)
- # export in graphical format
- import graphviz
- dot_data = tree.export_graphviz(clf, out_file=None, filled=False, rounded=True, impurity=True,
- class_names=target_names,
- feature_names=feature_names
- )
- graph = graphviz.Source(dot_data)
- graph.render("graph")
- # export in text format
- r = tree.export_text(clf)
- print('\n'+r)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement