Advertisement
Guest User

Untitled

a guest
May 20th, 2019
122
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 2.97 KB | None | 0 0
  1. t.test - test sprawdzenia srednich dwoch grup
  2. jak 0.05 < to hipoteza
  3.  
  4. t.test:
  5. tabelka:
  6.         |gen    |  p.value|
  7.         |-----------------|
  8.         |cos    |   cos   |
  9.  
  10.  
  11.  
  12.  
  13.  
  14.  
  15.  
  16. library(dplyr)
  17. library(caret)
  18. library(randomForest)
  19. library(ModelMetrics)
  20. setwd("F:/pajtoon/lab 11")
  21. load('dataLeukemia.RData')
  22.  
  23. View(data[1:100,1:100])
  24. names(data)[1:10]
  25. unique(data$Leukemia.class)
  26.  
  27. set.data = data[data$Leukemia.class == "AML with normal karyotype + other abnormalities" |data$Leukemia.class == "CLL", -c(1,3)]
  28. set.data$Leukemia.class = as.numeric(set.data$Leukemia.class)/6-1
  29. View(set.data[,1:100])
  30. rm(data)
  31. v=1:length(set.data$Leukemia.class)
  32.  
  33. index.test = sample(v,round(length(set.data$Leukemia.class)/3))
  34. index.train = v[-index.test]
  35.  
  36. data.train = set.data[index.train,]
  37. data.test = set.data[index.test,]
  38.  
  39. result = randomForest(x=data.train[,-1], y=as.factor(data.train[,1]), importance = TRUE)
  40. var.imp = result$importance  # View(var.imp)
  41. var.imp100 = row.names(var.imp[order(var.imp[,4], decreasing = TRUE),])[1:100]
  42. View(var.imp[order(var.imp[,4], decreasing = TRUE),])
  43. var.pred.test = predict(result, x=data.test[,var.imp100])
  44. print(auc(as.factor(data.test[,1]), var.pred.test))
  45.  
  46. result2 = randomForest(x=data.train[,var.imp100], y=as.factor(data.train[,1]), importance = TRUE)
  47. var.pred.test = predict(result2, x=data.test[,var.imp100])
  48. print(auc(as.factor(data.test[,1]), var.pred.test))
  49.  
  50.  
  51.  
  52. #####
  53. gene.vac = set.data[,1:2]
  54. View(gene.vac)
  55. geneCLL = gene.vac[gene.vac$Leukemia.class == 0,]
  56. geneAML = gene.vac[gene.vac$Leukemia.class == 1,]
  57. p.value = t.test(geneCLL, geneAML)$p.value
  58. p.value
  59. # t-test dla wszystkich kolumn
  60. Ttest= function(set.data.i){
  61.   gene.vac = set.data[,c(1,i)]
  62.   geneCLL = gene.vac[gene.vac$Leukemia.class ==0,]
  63.   geneAML = gene.vac[gene.vac$Leukemia.class ==1,]
  64.   p.value= t.test(geneCLL, geneAML)$p.value
  65. }
  66.  
  67. list() ->pvalue
  68. for (i in 2:54676) {
  69.   pvalue[(i)] = Ttest(set.data,i)
  70. }
  71.  
  72. data.frame.pvalue = cbind(names(set.data)[-1], unlist(pvalue))
  73. data.frame.pvalue.order = as.data.frame(data.frame.pvalue[order(as.numeric(data.frame.pvalue[,2]), decreasing = FALSE),])
  74. names(data.frame.pvalue.order)
  75.  
  76. p.value.corect = p.adjust(data.frame.pvalue.order[,2], method = "BH")
  77. View(p.value.corect)
  78.  
  79.  
  80.  
  81.  
  82.  
  83.  
  84.  
  85.  
  86.  
  87.  
  88.  
  89.  
  90.  
  91. table(data$Leukemia.class)
  92.  
  93. cechy <- filter(data$Leukemia.class == "CLL", data$Leukemia.class == "AML with normal karyotype + other abnormalities")
  94.  
  95. data.asd <- data[data$Leukemia.class == "CLL" | data$Leukemia.class == "AML with normal karyotype + other abnormalities"]
  96.  
  97. rows <- sample.int(nrow(data), size = round(nrow(data)/3), replace = F)
  98. data.train <- data[-rows,]
  99. data.test <- data[rows,]
  100.  
  101. View(data.test[1:100,1:100])
  102.  
  103. library(randomForest)
  104. model.rf <- randomForest(x = data.train[-2], y = data.train[,2], ntree = 100 , do.trace = 10)
  105.  
  106. rf.result <- predict(model.rf , newdata = data.test[,-2])
  107. error.rf1 <- sum(data.test[,2] != rf.result)/length(rf.result)
  108.  
  109.  
  110. ###
  111. gene.vac = set.data[,1:2]
  112. geneCLL = gene.vac[gene.vac$]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement