Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(functional)
- load_abalones_data = function(path, standarize=FALSE, columns) {
- #' Load abalone.data from dataset <columns> to a matrix.
- #'
- #' @param path: path to file abalones.data
- #' @param standaraize: standaraize each column with mean and sd.
- #' @param columns: the columns you want your data
- #' @returns
- data = read.csv(path)
- # Add every chosen column to variable X
- X = c()
- for (col_index in columns) {
- col_to_add = as.matrix(data[, col_index])
- if (standarize) {
- col_to_add = standarize_vector(col_to_add)
- }
- X = cbind(X, col_to_add)
- }
- Y = as.matrix(dicotomize_abalones(data$Sex))
- return(cbind(Y, X))
- }
- # Sigmoid function
- sigmoid = function(z) {
- return( 1/ (1 + exp(-z)))
- }
- dicotomize_abalones = function(data) {
- unlist(Map(function(g) as.numeric(g %in% c('M', 'F')), data))
- }
- standarize_vector = function(v) {
- unlist(Map(function(x) (x - mean(v)) / sd(v), v))
- }
- norm_euclidean = function(x, y) {
- sqrt(sum((x-y)^2))
- }
- find_knn_2 = function(data, target, k) {
- #' Find the k nearest neighbours of target
- #'
- #' @param data: a N x 2 matrix, each row is the (x,y) of a point.
- #' @param target: the point to find its knn
- #' @param k: the K in KNN
- # Define a starting set of knn indexes and the remainder
- knn_indexes = 1:k
- knn_norms = norms = unlist(Map(function(i) norm_euclidean(data[i,], target) ,knn_indexes))
- # Store knn indexes and norms in a 2 column matrix
- knn_result = matrix(nrow=k, ncol=2, data=c(knn_indexes, knn_norms))
- # Store max as (index, norm_to_target)
- max_norm = max(knn_result[,2])
- max_neighbor = c(which(max_norm == knn_result[,2]), max_norm)
- remainder_indexes = (k+1):nrow(data)
- for (i in remainder_indexes) {
- norm_i = norm_euclidean(data[i,], target)
- if (norm_i < max_neighbor[2]) {
- index_to_replace = max_neighbor[1]
- knn_result[index_to_replace, ] = c(i, norm_i)
- max_norm = max(knn_result[,2])
- max_neighbor = c(which(max_norm == knn_result[,2]), max_norm)
- }
- }
- return(knn_result)
- }
- knn_classification = function(knns_class, factors) {
- #' Find max appearing factor in knns
- if (sum(knns_class == factors[1]) > sum(knns_class == factors[2])) {
- return(factors[1])
- } else {
- return(factors[2])
- }
- }
Add Comment
Please, Sign In to add comment