Advertisement
Guest User

Untitled

a guest
Dec 13th, 2017
47
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.58 KB | None | 0 0
  1. #read in the data with no column headers
  2. theData = read.csv("40124332_train.csv", header=F)
  3.  
  4. #we are using 5 folds for this cross validation
  5. kfoldsk = 5
  6. theData <- theData[sample(nrow(theData)),]
  7.  
  8. #cut data into 5 folds
  9. theData$folds <- cut(seq(1,nrow(theData)),breaks=kfoldsk,labels=FALSE)
  10.  
  11. #assign column names to our 4 features and label
  12. colnames(theData) = c("f1", "f2", "f3", "f4", "f5")
  13.  
  14. #we are using the first 4 features in this task
  15. features = c("f2","f3","f4","f5")
  16.  
  17. #the k values
  18. knnk = c(1,3,5,9,17,33)
  19. #loop over the folds for every k value
  20. for(i in knnk){
  21. print("K:")
  22. print(i)
  23. for(j in 1:kfoldsk){
  24. #split data into training and validation sets
  25. train_items = theData[theData$folds != j,]
  26. validation_items = theData[theData$folds == j,]
  27.  
  28. # fit knn model on this fold
  29. #using training data and validation data
  30. #we are predicting what digit it is i.e the first column in the data
  31. predictions = knn(train_items[,features], validation_items[,features], train_items$f1, k=i)
  32.  
  33. correct_list = predictions == validation_items$f1
  34. nr_correct = nrow(validation_items[correct_list,])
  35.  
  36. acc_rate = nr_correct/nrow(validation_items)
  37. print(acc_rate)
  38. }
  39. }
  40.  
  41. #obtain the most confusable digits from a k value of 3
  42. for(i in 1:kfoldsk){
  43. train_items = theData[theData$folds != i,]
  44. validation_items = theData[theData$folds == i,]
  45. # fit knn model on this fold
  46. predictions = knn(train_items[,features], validation_items[,features], train_items$f1, k=3)
  47. }
  48. #prints a 10x10 matrix of the guesses
  49. table(predictions, validation_items$f1)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement