Advertisement
Guest User

Untitled

a guest
Nov 13th, 2019
108
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.16 KB | None | 0 0
  1. df_clean1 = filter(df1, glucose!=0, mass!=0, pedigree!=0, age!=0, pressure != 0)
  2. set.seed(100)
  3. training.idx = sample(1: nrow(df_clean1), size=nrow(df_clean1)*0.8)
  4. train1.data = df1[training.idx, ]
  5. test1.data = df1[-training.idx, ]
  6. ---
  7. # Create copies of original dataset
  8. train1.kNN = copy(train1.data)
  9. test1.kNN = copy(test1.data)
  10. # Normalize numeric variables
  11. nor = function(x) {(x -min(x))/(max(x)-min(x))}
  12. train1.kNN[, 1:8] = sapply(train1.data[, 1:8], nor)
  13. test1.kNN[, 1:8] = sapply(test1.data[, 1:8], nor)
  14. # Include only pregnant+glucose+pressure+mass+pedigree
  15. train1.kNN = train1.kNN[c(1, 2, 3, 6, 7, 10)]
  16. test1.kNN = test1.kNN[c(1, 2, 3, 6, 7, 10)]
  17. # Try different k to find the best classifier
  18. ac = rep(0, 30)
  19. for(i in 1:30) {
  20. set.seed(123)
  21. knn.i = knn(train1.kNN[, 1:5], test1.kNN[, 1:5], cl=train1.kNN$y, k=i)
  22. ac[i] = mean(knn.i == test1.kNN$y)
  23. }
  24. # Accuracy plot
  25. plot(ac, type="b", xlab="K", ylab="Accuracy")
  26. set.seed(123)
  27. knn1 = knn(train1.kNN[, 1:5], test1.kNN[, 1:5], cl=train1.kNN$y, k=30)
  28. mean(knn1 == test1.kNN$y)
  29. table(knn1, test1.kNN$y)
  30. confusionMatrix(knn1, test1.kNN$y)
  31. acc_kNN <- confusionMatrix(knn1, test1.kNN$y )$overall['Accuracy']
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement