Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(dplyr)
- library(caret)
- library(randomForest)
- #x<-sample.int(sizeData1, size= sizeData1, replace=FALSE)
- #dataOneClass1<-dataOneClass1[index.permut1,]
- #folds1 <-cut(seq(1,length(dataOneClass1[,1])),breaks=k_cross,labels=FALSE)
- load('dataLeukemia.RData')
- dim(data)
- data=data[,-c(1,3)]
- View(data[1:50,1:10])
- table(data$Leukemia.class)
- data0=data[data$Leukemia.class == 'c-ALL/Pre-B-ALL without t(9;22)' | data$Leukemia.class =='MDS', ] # call-198 , MDS - 193
- dim(data0)
- data0$Leukemia.class= as.numeric(data0$Leukemia.class)
- print(data0$Leukemia.class)
- View(data0)
- index_train = index_test =list()
- index_all= 1:391
- index_test[[1]]=c(1:66,199:262)
- index_train[[1]]=index_all[-c(1:66,199:262)]
- index_test[[2]]=c(67:132,263:327)
- index_train[[2]]=index_all[-c(67:132,263:327)]
- index_test[[3]]=c(133:198,328:391)
- index_train[[3]]=index_all[-c(133:198,328:391)]
- result_test=t.test()
- class = data0[,1]
- data_genes= data0[,-1]
- fun_ttest = function(m,data_genes,class){
- resutlt_test =t.test(x= c(data_genes[,m+1]),y=class,alternative = c("two.sided"),var.equal = TRUE)
- result_test_pvalue =resutlt_test$p.value
- return(result_test_pvalue)
- }
- list_pValue=list()
- for(i in 1:ncol(data_genes)){
- list_pValue[[i]]= fun_ttest(i,data_genes,class)
- }
- vec_pValue = unlist(list_pValue)
- name_gene_pValue = cbind(names(data_genes),vec_pValue)
- name_gene_pValue_sort = name_gene_pValue[order(name_gene_pValue[,2]),]
- name_gene_pValue_sort_adj = cbind(name_gene_pValue_sort[,1],p.adjust(name_gene_pValue_sort[,2],method ='BH'))
- var.imp<-name_gene_pValue_sort_adj[which(name_gene_pValue_sort_adj[,2]<0.05),]
- dim(var.imp)
- result_model= randomForest::randomForest(x=data_genes[index_train[[1]],1:100], y=as.factor(class[index_train[[1]]]), xtest=data_genes[index_train[[1]],1:100],
- ytest=as.factor(class[index_train[[1]]]),
- ntree=500,
- importance=TRUE, do.trace=TRUE)
- list_test=list();
- list_test[[1]]=c(data0[1:66],data0[199:262])
- list_test[[2]]=c(data0[67:132],data0[263:327])
- list_test[[3]]=c(data0[133:198],data0[328:391])
- list_train=list();
- list_train[[1]]=data0[-c(data0[1:66,],data0[199:262,])]
- list_train[[2]]=data0[-c(data0[67:132,],data0[263:327,])]
- list_train[[3]]=data0[-c(data0[133:198,],data0[328:391,])]
- fx <-function(k,data){
- dataclass1<-as.data.frame(data[data[,1]==0,1,3])
- dataclass2<-as.data.frame(data[data[,1]==0,2,3])
- size1 = nrow(dataclass1)
- size2 = nrow(dataclass2)
- index.permut1 = sample.int(size1, size=dataclass1,replace = FALSE)
- index.permut2 = sample.int(size2, size=dataclass2,replace = FALSE)
- dataclass1<-dataclass1[index.permut1,]
- dataclass2<-dataclass2[index.permut2,]
- f1<-cut(seq(1,length(dataclass1[,1])),breaks = k_cross,labels=FALSE)
- f2<-cut(seq(1,length(dataclass2[,1])),breaks = k_cross,labels=FALSE)
- train.i<-list()
- test.i<-list()
- for(k in 1:k_cross){
- test.i1<-which(f1==k,arr.ind=TRUE)
- test.i2<-which(f2==k,arr.ind=TRUE)
- test.i[[k]]<-row.names(rbind(dataclass1[test.i1,],dataclass2[test.i2,]))
- train.i[[k]]<-row.names(rbind(dataclass1[-test.i1,],dataclass2[-test.i2,]))
- }
- indexTrainTest = list(train.i,test.i)
- return(indexTrainTest)
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement