Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Load libraries
- # Atau: Panggil guru-guru yang relevan: pandas, sklearn
- import pandas as pd
- from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
- from sklearn.model_selection import train_test_split # Import train_test_split function
- from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation
- # Beri nama kolom yang sesuai dengan data.csv yang kita gunakan juga kemarin
- col_names = ['pregnant', 'glucose', 'bp', 'skin', 'insulin', 'bmi', 'pedigree', 'age', 'label']
- # load dataset, mirip dengan yang sebelumnya
- pima = pd.read_csv("data.csv", header=None, names=col_names)
- # split dataset in features and target variable
- # kita pisahkan berdasarkan nama kolom, pada sebelumnya kita pisahkan antara 8 kolom di kiri dan 1 kolom di kanan
- # kali ini kita pisahkan berdasarkan nama. Ini merupakan pendekatan yang berbeda dalam menyiapkan data
- feature_cols = ['pregnant', 'insulin', 'bmi', 'age','glucose','bp','pedigree']
- X = pima[feature_cols] # Features
- y = pima.label # Target variable
- # Split dataset into training set and test set
- # kali ini kita memisahkan data yang banyak menjadi Training dan Test. Sebelumnya tidak kita lakukan
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) # 70% training and 30% test
- # Create Decision Tree classifer object without Pre-Pruning above and with Pre-Pruning below, comment one of them
- clf = DecisionTreeClassifier() # without Pruning
- # clf = DecisionTreeClassifier(criterion="entropy", max_depth=3) # with Pruning
- # Train the Decision Tree
- clf = clf.fit(X_train,y_train)
- #Predict the response for test dataset
- y_pred = clf.predict(X_test)
- # Model Accuracy, how often is the classifier correct?
- print("Accuracy:",metrics.accuracy_score(y_test, y_pred)*100,"%")
- # Prepare the Plotter as Graph
- # Untuk menampilkan Grafik-nya
- from matplotlib import pyplot as plt
- from sklearn import tree
- fig = plt.figure(figsize=(25,20))
- _ = tree.plot_tree(clf,feature_names = feature_cols,class_names=['0','1'],filled=True)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement