Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- setwd("~/R/harjottelut/prostate cancer")
- library(class)
- library(gmodels)
- prc <- read.csv("Prostate_Cancer.csv", stringsAsFactors = FALSE)
- #str(prc)
- prc <- prc[-1]
- #View(prc)
- #table(prc$diagnosis_result)
- prc$diagnosis_result <- factor(prc$diagnosis_result, levels = c("B", "M"), labels = c("Benign", "Malignant"))
- #round(prop.table(table(prc$diagnosis)) * 100, digits = 1)
- normalize <- function(x) {
- return ((x - min(x)) / (max(x) - min(x)))
- }
- prc_n <- as.data.frame(lapply(prc[2:9], normalize))
- #View(prc_n)
- #summary(prc_n$radius)
- prc_train <- prc_n[1:65,]
- prc_test <- prc_n[66:100,]
- prc_train_labels <- prc[1:65, 1]
- prc_test_labels <- prc[66:100, 1]
- prc_test_pred <- knn(train = prc_train, test = prc_test, cl = prc_train_labels, k=12) #decided to go with k=12
- CrossTable(x = prc_test_labels, y = prc_test_pred, prop.chisq = FALSE) #shows the results. not perfect, but decent
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement