Advertisement
Guest User

Untitled

a guest
Jun 25th, 2018
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.26 KB | None | 0 0
  1. library(dplyr)
  2. library(caret)
  3. library(randomForest)
  4.  
  5. #x<-sample.int(sizeData1, size= sizeData1, replace=FALSE)
  6. #dataOneClass1<-dataOneClass1[index.permut1,]
  7. #folds1 <-cut(seq(1,length(dataOneClass1[,1])),breaks=k_cross,labels=FALSE)
  8. load('dataLeukemia.RData')
  9. dim(data)
  10.  
  11. data=data[,-c(1,3)]
  12. View(data[1:50,1:10])
  13.  
  14. table(data$Leukemia.class)
  15.  
  16. data0=data[data$Leukemia.class == 'c-ALL/Pre-B-ALL without t(9;22)' | data$Leukemia.class =='MDS', ] # call-198 , MDS - 193
  17.  
  18. dim(data0)
  19. data0$Leukemia.class= as.numeric(data0$Leukemia.class)
  20. print(data0$Leukemia.class)
  21. View(data0)
  22.  
  23. index_train = index_test =list()
  24. index_all= 1:391
  25. index_test[[1]]=c(1:66,199:262)
  26. index_train[[1]]=index_all[-c(1:66,199:262)]
  27. index_test[[2]]=c(67:132,263:327)
  28. index_train[[2]]=index_all[-c(67:132,263:327)]
  29. index_test[[3]]=c(133:198,328:391)
  30. index_train[[3]]=index_all[-c(133:198,328:391)]
  31.  
  32. result_test=t.test()
  33.  
  34.  
  35. class = data0[,1]
  36. data_genes= data0[,-1]
  37.  
  38. fun_ttest = function(m,data_genes,class){
  39. resutlt_test =t.test(x= c(data_genes[,m+1]),y=class,alternative = c("two.sided"),var.equal = TRUE)
  40. result_test_pvalue =resutlt_test$p.value
  41. return(result_test_pvalue)
  42. }
  43.  
  44. list_pValue=list()
  45. for(i in 1:ncol(data_genes)){
  46. list_pValue[[i]]= fun_ttest(i,data_genes,class)
  47. }
  48.  
  49. vec_pValue = unlist(list_pValue)
  50. name_gene_pValue = cbind(names(data_genes),vec_pValue)
  51. name_gene_pValue_sort = name_gene_pValue[order(name_gene_pValue[,2]),]
  52. name_gene_pValue_sort_adj = cbind(name_gene_pValue_sort[,1],p.adjust(name_gene_pValue_sort[,2],method ='BH'))
  53. var.imp<-name_gene_pValue_sort_adj[which(name_gene_pValue_sort_adj[,2]<0.05),]
  54. dim(var.imp)
  55.  
  56. result_model= randomForest::randomForest(x=data_genes[index_train[[1]],1:100], y=as.factor(class[index_train[[1]]]), xtest=data_genes[index_train[[1]],1:100],
  57. ytest=as.factor(class[index_train[[1]]]),
  58. ntree=500,
  59. importance=TRUE, do.trace=TRUE)
  60.  
  61.  
  62. list_test=list();
  63. list_test[[1]]=c(data0[1:66],data0[199:262])
  64. list_test[[2]]=c(data0[67:132],data0[263:327])
  65. list_test[[3]]=c(data0[133:198],data0[328:391])
  66. list_train=list();
  67. list_train[[1]]=data0[-c(data0[1:66,],data0[199:262,])]
  68. list_train[[2]]=data0[-c(data0[67:132,],data0[263:327,])]
  69. list_train[[3]]=data0[-c(data0[133:198,],data0[328:391,])]
  70.  
  71.  
  72.  
  73. fx <-function(k,data){
  74. dataclass1<-as.data.frame(data[data[,1]==0,1,3])
  75. dataclass2<-as.data.frame(data[data[,1]==0,2,3])
  76. size1 = nrow(dataclass1)
  77. size2 = nrow(dataclass2)
  78. index.permut1 = sample.int(size1, size=dataclass1,replace = FALSE)
  79. index.permut2 = sample.int(size2, size=dataclass2,replace = FALSE)
  80. dataclass1<-dataclass1[index.permut1,]
  81. dataclass2<-dataclass2[index.permut2,]
  82. f1<-cut(seq(1,length(dataclass1[,1])),breaks = k_cross,labels=FALSE)
  83. f2<-cut(seq(1,length(dataclass2[,1])),breaks = k_cross,labels=FALSE)
  84. train.i<-list()
  85. test.i<-list()
  86. for(k in 1:k_cross){
  87. test.i1<-which(f1==k,arr.ind=TRUE)
  88. test.i2<-which(f2==k,arr.ind=TRUE)
  89. test.i[[k]]<-row.names(rbind(dataclass1[test.i1,],dataclass2[test.i2,]))
  90. train.i[[k]]<-row.names(rbind(dataclass1[-test.i1,],dataclass2[-test.i2,]))
  91.  
  92. }
  93. indexTrainTest = list(train.i,test.i)
  94. return(indexTrainTest)
  95. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement