Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- t.test - test sprawdzenia srednich dwoch grup
- jak 0.05 < to hipoteza
- t.test:
- tabelka:
- |gen | p.value|
- |-----------------|
- |cos | cos |
- library(dplyr)
- library(caret)
- library(randomForest)
- library(ModelMetrics)
- setwd("F:/pajtoon/lab 11")
- load('dataLeukemia.RData')
- View(data[1:100,1:100])
- names(data)[1:10]
- unique(data$Leukemia.class)
- set.data = data[data$Leukemia.class == "AML with normal karyotype + other abnormalities" |data$Leukemia.class == "CLL", -c(1,3)]
- set.data$Leukemia.class = as.numeric(set.data$Leukemia.class)/6-1
- View(set.data[,1:100])
- rm(data)
- v=1:length(set.data$Leukemia.class)
- index.test = sample(v,round(length(set.data$Leukemia.class)/3))
- index.train = v[-index.test]
- data.train = set.data[index.train,]
- data.test = set.data[index.test,]
- result = randomForest(x=data.train[,-1], y=as.factor(data.train[,1]), importance = TRUE)
- var.imp = result$importance # View(var.imp)
- var.imp100 = row.names(var.imp[order(var.imp[,4], decreasing = TRUE),])[1:100]
- View(var.imp[order(var.imp[,4], decreasing = TRUE),])
- var.pred.test = predict(result, x=data.test[,var.imp100])
- print(auc(as.factor(data.test[,1]), var.pred.test))
- result2 = randomForest(x=data.train[,var.imp100], y=as.factor(data.train[,1]), importance = TRUE)
- var.pred.test = predict(result2, x=data.test[,var.imp100])
- print(auc(as.factor(data.test[,1]), var.pred.test))
- #####
- gene.vac = set.data[,1:2]
- View(gene.vac)
- geneCLL = gene.vac[gene.vac$Leukemia.class == 0,]
- geneAML = gene.vac[gene.vac$Leukemia.class == 1,]
- p.value = t.test(geneCLL, geneAML)$p.value
- p.value
- # t-test dla wszystkich kolumn
- Ttest= function(set.data.i){
- gene.vac = set.data[,c(1,i)]
- geneCLL = gene.vac[gene.vac$Leukemia.class ==0,]
- geneAML = gene.vac[gene.vac$Leukemia.class ==1,]
- p.value= t.test(geneCLL, geneAML)$p.value
- }
- list() ->pvalue
- for (i in 2:54676) {
- pvalue[(i)] = Ttest(set.data,i)
- }
- data.frame.pvalue = cbind(names(set.data)[-1], unlist(pvalue))
- data.frame.pvalue.order = as.data.frame(data.frame.pvalue[order(as.numeric(data.frame.pvalue[,2]), decreasing = FALSE),])
- names(data.frame.pvalue.order)
- p.value.corect = p.adjust(data.frame.pvalue.order[,2], method = "BH")
- View(p.value.corect)
- table(data$Leukemia.class)
- cechy <- filter(data$Leukemia.class == "CLL", data$Leukemia.class == "AML with normal karyotype + other abnormalities")
- data.asd <- data[data$Leukemia.class == "CLL" | data$Leukemia.class == "AML with normal karyotype + other abnormalities"]
- rows <- sample.int(nrow(data), size = round(nrow(data)/3), replace = F)
- data.train <- data[-rows,]
- data.test <- data[rows,]
- View(data.test[1:100,1:100])
- library(randomForest)
- model.rf <- randomForest(x = data.train[-2], y = data.train[,2], ntree = 100 , do.trace = 10)
- rf.result <- predict(model.rf , newdata = data.test[,-2])
- error.rf1 <- sum(data.test[,2] != rf.result)/length(rf.result)
- ###
- gene.vac = set.data[,1:2]
- geneCLL = gene.vac[gene.vac$]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement