Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Use the Cars93 dataset from the MASS package to
- # Use KNN to classify Origin of cars using cars characteristics
- set.seed(8675309)
- rm(list = ls())
- library(MASS) # Cars93 dataset
- library(class) # KNN function
- library(gmodels) # CrossTable function
- data(Cars93)
- # remove incomplete rows
- cars <- Cars93[complete.cases(Cars93), ]
- # hold out 25 random observations
- holdOutIndex <- sample(1:nrow(cars), 25, replace = FALSE)
- # get numeric variables and rescale them
- numerics <- sapply(cars, is.numeric)
- # separate the labels
- target <- cars$Origin
- trainingTarget <- target[-holdOutIndex]
- testingTarget <- target[holdOutIndex]
- scaledCars <- data.frame(sapply(cars[numerics],
- function(x) scale(x, center = TRUE, scale = TRUE)))
- # compare pre and post scaling
- summary(cars)
- summary(scaledCars)
- # build back the training and testing dataset
- training <- scaledCars[-holdOutIndex, ]
- testing <- scaledCars[holdOutIndex, ]
- # run the model [default to k = sqrt(nobs) ~ 9]
- k <- 9
- knnPredictions <- knn(train = training, test = testing, cl = trainingTarget, k = k)
- CrossTable(testingTarget, knnPredictions, prop.chisq = FALSE)
- # 8 True positives, 12 true negatives, 2 false negatives, 3 false positives
Add Comment
Please, Sign In to add comment