Guest User

Untitled

a guest
Jul 22nd, 2018
104
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.37 KB | None | 0 0
  1. library(functional)
  2.  
  3.  
  4. load_abalones_data = function(path, standarize=FALSE, columns) {
  5. #' Load abalone.data from dataset <columns> to a matrix.
  6. #'
  7. #' @param path: path to file abalones.data
  8. #' @param standaraize: standaraize each column with mean and sd.
  9. #' @param columns: the columns you want your data
  10. #' @returns
  11.  
  12. data = read.csv(path)
  13.  
  14. # Add every chosen column to variable X
  15. X = c()
  16. for (col_index in columns) {
  17. col_to_add = as.matrix(data[, col_index])
  18.  
  19. if (standarize) {
  20. col_to_add = standarize_vector(col_to_add)
  21. }
  22. X = cbind(X, col_to_add)
  23. }
  24. Y = as.matrix(dicotomize_abalones(data$Sex))
  25.  
  26. return(cbind(Y, X))
  27. }
  28.  
  29. # Sigmoid function
  30. sigmoid = function(z) {
  31. return( 1/ (1 + exp(-z)))
  32. }
  33.  
  34.  
  35. dicotomize_abalones = function(data) {
  36. unlist(Map(function(g) as.numeric(g %in% c('M', 'F')), data))
  37. }
  38.  
  39.  
  40. standarize_vector = function(v) {
  41. unlist(Map(function(x) (x - mean(v)) / sd(v), v))
  42. }
  43.  
  44.  
  45. norm_euclidean = function(x, y) {
  46. sqrt(sum((x-y)^2))
  47. }
  48.  
  49.  
  50. find_knn_2 = function(data, target, k) {
  51. #' Find the k nearest neighbours of target
  52. #'
  53. #' @param data: a N x 2 matrix, each row is the (x,y) of a point.
  54. #' @param target: the point to find its knn
  55. #' @param k: the K in KNN
  56.  
  57. # Define a starting set of knn indexes and the remainder
  58. knn_indexes = 1:k
  59. knn_norms = norms = unlist(Map(function(i) norm_euclidean(data[i,], target) ,knn_indexes))
  60.  
  61. # Store knn indexes and norms in a 2 column matrix
  62. knn_result = matrix(nrow=k, ncol=2, data=c(knn_indexes, knn_norms))
  63. # Store max as (index, norm_to_target)
  64. max_norm = max(knn_result[,2])
  65. max_neighbor = c(which(max_norm == knn_result[,2]), max_norm)
  66.  
  67. remainder_indexes = (k+1):nrow(data)
  68. for (i in remainder_indexes) {
  69. norm_i = norm_euclidean(data[i,], target)
  70.  
  71. if (norm_i < max_neighbor[2]) {
  72.  
  73. index_to_replace = max_neighbor[1]
  74.  
  75. knn_result[index_to_replace, ] = c(i, norm_i)
  76.  
  77. max_norm = max(knn_result[,2])
  78. max_neighbor = c(which(max_norm == knn_result[,2]), max_norm)
  79. }
  80. }
  81. return(knn_result)
  82. }
  83.  
  84.  
  85. knn_classification = function(knns_class, factors) {
  86. #' Find max appearing factor in knns
  87. if (sum(knns_class == factors[1]) > sum(knns_class == factors[2])) {
  88. return(factors[1])
  89. } else {
  90. return(factors[2])
  91. }
  92. }
Add Comment
Please, Sign In to add comment