Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def DTC(df,target):
- import numpy as np
- import pandas as pd
- from sklearn import grid_search
- from sklearn.tree import DecisionTreeClassifier
- #Splitting the data into train and test
- x_train,x_test,y_train,y_test = ms.train_test_split(df,target,test_size=0.3,random_state=123)
- dtc = DecisionTreeClassifier(criterion='gini',min_samples_split=2,random_state=123)
- dtc.fit(x_train,y_train)
- pred_dt_train = dtc.predict(x_train)
- pred_dt_test=dtc.predict(x_test)
- print(dtc.score(x_train,y_train))
- print(dtc.score(x_test,y_test))
- #parameter tuning using grid search
- parameters = {'max_depth': list(range(3,20)),'min_samples_leaf':list(range(2,8))}
- dtc_cv = grid_search.GridSearchCV(DecisionTreeClassifier(random_state=123), parameters)
- dtc_cv.fit(x_train,y_train)
- tree_model = dtc_cv.best_estimator_
- print("\n Tuning with Grid Search \n")
- print(tree_model)
- print ("\n best score \n",dtc_cv.best_score_,"\n best params \n", dtc_cv.best_params_)
Add Comment
Please, Sign In to add comment