Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Load libraries
- from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
- from sklearn.model_selection import train_test_split # Import train_test_split function
- from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation
- col_names = ['pregnant', 'glucose', 'bp', 'skin', 'insulin', 'bmi', 'pedigree', 'age', 'label']
- # load dataset
- pima = pd.read_csv("diabetes.csv", header=None, names=col_names)
- #split dataset in features and target variable
- feature_cols = ['pregnant', 'insulin', 'bmi', 'age','glucose','bp','pedigree']
- X = pima[feature_cols] # Features
- y = pima.label # Target variable
- # Split dataset into training set and test set
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) # 70% training and 30% test
- # Create Decision Tree classifer object
- clf = DecisionTreeClassifier()
- # Train Decision Tree Classifer
- clf = clf.fit(X_train,y_train)
- #Predict the response for test dataset
- y_pred = clf.predict(X_train)
- # Predict and print the label for the new data point X_new
- new_prediction = clf.predict(X_test)
- print("Prediction: {}".format(new_prediction))
- # Model Accuracy, how often is the classifier correct?
- print("Accuracy:",metrics.accuracy_score(y_train, y_pred))
- And
- # Load libraries
- import pandas as pd
- from sklearn.model_selection import train_test_split # Import train_test_split function
- from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation
- #Import Gaussian Naive Bayes model
- from sklearn.naive_bayes import GaussianNB
- col_names = ['pregnant', 'glucose', 'bp', 'skin', 'insulin', 'bmi', 'pedigree', 'age', 'label']
- # load dataset
- pima = pd.read_csv("diabetes.csv", header=None, names=col_names)
- #split dataset in features and target variable
- feature_cols = ['pregnant', 'insulin', 'bmi', 'age','glucose','bp','pedigree']
- X = pima[feature_cols] # Features
- y = pima.label # Target variable
- # Split dataset into training set and test set
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) # 70% training and 30% test
- #Create a Gaussian Classifier
- model = GaussianNB()
- # Train DGaussian Classifier
- model = model.fit(X_train,y_train)
- #Predict the response for test dataset
- y_pred = model.predict(X_train)
- # Predict and print the label for the new data point X_new
- new_prediction = model.predict(X_test)
- print("Prediction: {}".format(new_prediction))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement