Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- df_clean1 = filter(df1, glucose!=0, mass!=0, pedigree!=0, age!=0, pressure != 0)
- set.seed(100)
- training.idx = sample(1: nrow(df_clean1), size=nrow(df_clean1)*0.8)
- train1.data = df1[training.idx, ]
- test1.data = df1[-training.idx, ]
- ---
- # Create copies of original dataset
- train1.kNN = copy(train1.data)
- test1.kNN = copy(test1.data)
- # Normalize numeric variables
- nor = function(x) {(x -min(x))/(max(x)-min(x))}
- train1.kNN[, 1:8] = sapply(train1.data[, 1:8], nor)
- test1.kNN[, 1:8] = sapply(test1.data[, 1:8], nor)
- # Include only pregnant+glucose+pressure+mass+pedigree
- train1.kNN = train1.kNN[c(1, 2, 3, 6, 7, 10)]
- test1.kNN = test1.kNN[c(1, 2, 3, 6, 7, 10)]
- # Try different k to find the best classifier
- ac = rep(0, 30)
- for(i in 1:30) {
- set.seed(123)
- knn.i = knn(train1.kNN[, 1:5], test1.kNN[, 1:5], cl=train1.kNN$y, k=i)
- ac[i] = mean(knn.i == test1.kNN$y)
- }
- # Accuracy plot
- plot(ac, type="b", xlab="K", ylab="Accuracy")
- set.seed(123)
- knn1 = knn(train1.kNN[, 1:5], test1.kNN[, 1:5], cl=train1.kNN$y, k=30)
- mean(knn1 == test1.kNN$y)
- table(knn1, test1.kNN$y)
- confusionMatrix(knn1, test1.kNN$y)
- acc_kNN <- confusionMatrix(knn1, test1.kNN$y )$overall['Accuracy']
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement