Guest User

Untitled

a guest
Jul 20th, 2018
107
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.30 KB | None | 0 0
  1. #I'm using two non-standard packages; one for nearest neighbors (FNN) and one for doing split-combine-apply (think map reduce) type operations.
  2. #you'll have to run this: install.packages(c('FNN', 'plyr'))
  3. #you can also checkout documentation in R using the question mark; run this: ?knn
  4. library(FNN)
  5. library(plyr)
  6.  
  7. #Here are some random points on the plane to show you the interface
  8. neartop = ldply(1:50, function(i){
  9. c( x = rnorm(1)
  10. , y = 1 + rnorm(1, sd=0.5))
  11. })
  12.  
  13. nearbottom = ldply(1:50, function(i){
  14. c( x = rnorm(1)
  15. , y = -1 + rnorm(1, sd=0.5))
  16. })
  17.  
  18. # the first 50 are near the top, the rest are near the bottom
  19. points = rbind(neartop, nearbottom)
  20.  
  21. #the vector of classification labels is easy to construct. Since the labels are TRUE/FALSE, the question we're asking is, "is this point in the set near the top?"
  22. points[,'label'] = (1:100 < 51)
  23.  
  24.  
  25. plot(points$x, points$y)
  26.  
  27. #split into training/testing sets by sampling 60 numbers from the set of 1--100, using them for training, and the rest for testing.
  28. train_i = sample(1:100, 60)
  29. train = points[train_i,c('x','y')]
  30. test = points[-train_i,c('x','y')]
  31.  
  32. results = knn(
  33. train
  34. , test
  35. , cpt_vecs
  36. , cl = points[train_i,'label']
  37. , k=1) #k = 1 just assigns a point to its nearest neighbor.
  38.  
  39. #percent correct
  40. sum(points[-train_i,'label'] == results) / nrow(test)
Add Comment
Please, Sign In to add comment