Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #I'm using two non-standard packages; one for nearest neighbors (FNN) and one for doing split-combine-apply (think map reduce) type operations.
- #you'll have to run this: install.packages(c('FNN', 'plyr'))
- #you can also checkout documentation in R using the question mark; run this: ?knn
- library(FNN)
- library(plyr)
- #Here are some random points on the plane to show you the interface
- neartop = ldply(1:50, function(i){
- c( x = rnorm(1)
- , y = 1 + rnorm(1, sd=0.5))
- })
- nearbottom = ldply(1:50, function(i){
- c( x = rnorm(1)
- , y = -1 + rnorm(1, sd=0.5))
- })
- # the first 50 are near the top, the rest are near the bottom
- points = rbind(neartop, nearbottom)
- #the vector of classification labels is easy to construct. Since the labels are TRUE/FALSE, the question we're asking is, "is this point in the set near the top?"
- points[,'label'] = (1:100 < 51)
- plot(points$x, points$y)
- #split into training/testing sets by sampling 60 numbers from the set of 1--100, using them for training, and the rest for testing.
- train_i = sample(1:100, 60)
- train = points[train_i,c('x','y')]
- test = points[-train_i,c('x','y')]
- results = knn(
- train
- , test
- , cpt_vecs
- , cl = points[train_i,'label']
- , k=1) #k = 1 just assigns a point to its nearest neighbor.
- #percent correct
- sum(points[-train_i,'label'] == results) / nrow(test)
Add Comment
Please, Sign In to add comment