Advertisement
Guest User

Untitled

a guest
Dec 6th, 2016
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.87 KB | None | 0 0
  1. setwd("~/R/harjottelut/prostate cancer")
  2. library(class)
  3. library(gmodels)
  4. prc <- read.csv("Prostate_Cancer.csv", stringsAsFactors = FALSE)
  5. #str(prc)
  6. prc <- prc[-1]
  7. #View(prc)
  8. #table(prc$diagnosis_result)
  9.  
  10. prc$diagnosis_result <- factor(prc$diagnosis_result, levels = c("B", "M"), labels = c("Benign", "Malignant"))
  11. #round(prop.table(table(prc$diagnosis)) * 100, digits = 1)
  12.  
  13. normalize <- function(x) {
  14. return ((x - min(x)) / (max(x) - min(x)))
  15. }
  16.  
  17. prc_n <- as.data.frame(lapply(prc[2:9], normalize))
  18. #View(prc_n)
  19. #summary(prc_n$radius)
  20.  
  21. prc_train <- prc_n[1:65,]
  22. prc_test <- prc_n[66:100,]
  23.  
  24. prc_train_labels <- prc[1:65, 1]
  25. prc_test_labels <- prc[66:100, 1]
  26.  
  27. prc_test_pred <- knn(train = prc_train, test = prc_test, cl = prc_train_labels, k=12) #decided to go with k=12
  28.  
  29. CrossTable(x = prc_test_labels, y = prc_test_pred, prop.chisq = FALSE) #shows the results. not perfect, but decent
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement