Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Split the dataset into two pieces: a training set and a testing set. Train the model on the training set. Test the model on the testing set, and evaluate how well we did.
- # STEP 1: split X and y into training and testing sets
- from sklearn.cross_validation import train_test_split
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=4)
- print(X_train.shape)
- print(X_test.shape)
- >> (90, 4) # 90 samples are selected randomly for training the algorithm
- >> (60, 4) # 60 samples are selected randomly for testing the algorithm
- # STEP 2: train the model on the training set
- logreg = LogisticRegression()
- logreg.fit(X_train, y_train)
- # STEP 3: make predictions on the testing set
- y_pred = logreg.predict(X_test)
- # compare actual response values (y_test) with predicted response values (y_pred)
- print(metrics.accuracy_score(y_test, y_pred))
- >> 0.95
- knn = KNeighborsClassifier(n_neighbors=5)
- knn.fit(X_train, y_train)
- y_pred = knn.predict(X_test)
- print(metrics.accuracy_score(y_test, y_pred))
- >> 0.966666666667
- knn = KNeighborsClassifier(n_neighbors=1)
- knn.fit(X_train, y_train)
- y_pred = knn.predict(X_test)
- print(metrics.accuracy_score(y_test, y_pred))
- >> 0.95
Add Comment
Please, Sign In to add comment