Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- load('dataLeukemia.RData')
- names(data[.1:10])
- data$Leukemia.class = as.numeric(data$Leukemia.class)
- data = data[.-c(1,3)]
- dataSel = data[data$Leukemia.class ==11 |data$Leukemia.class ==15,]
- table(data_sel$Leukemia.class)
- indexTrain = indexTest = list()
- indexAll = 1:391
- indexTest[[1]]= c(1:66,199:262)
- indexTrain[[1]]= indexAll[-c(1:66,199:262)]
- indexTest[[2]]= c(67:132,263:327)
- indexTrain[[2]]= indexAll[-c(67:132,263:327)]
- indexTest[[3]]= c(133:198,328:391)
- indexTrain[[3]]= indexAll[-c(133:198,328:391)]
- dim(dataSel)
- class = dataSel[,1]
- dataGenes = dataSel[,-1]
- #resultTest = tTest(x =c(dataGenes[,1]), y=class.alternative = c("two.sided"), var.equal = TRUE)
- #usunac nadmiarowe dane, te niepotrzebne
- tTest = function(m,dataGenes, class){
- resultTest =t.test(x =c(dataGenes[,1]), y=class, alternative = c("two.sided"), var.equal = TRUE)
- resultTestpValue = resultTest$p.value
- return(resultTest$p.value)
- }
- dataGenes0 = dataGenes[indexTrain[[3]],]
- class0 = class[indexTrain[[3]]]
- listpValue = list()
- for(i in 1:ncol(dataGenes0)){
- listpValue[[i]] = tTest(i,dataGenes0, class0)
- }
- vecpValue = unlist(listpValue)
- nameGenepValue = cbind(names(dataGenes), vecpValue)
- nameGenepValueSort = nameGenepValue[order(nameGenepValue[,2]),]
- nameGenepValueSortAdj = cbind(nameGenepValueSort[.1], p.adjust(nameGenepValueSort[.2], method = 'BH'))
- varImp<- nameGenepValueSortAdj[which(nameGenepValueSort[.2]<0.05),]
- resultModel = randomForest:: randomForest(x= dataGenes[indexTrain[[3]]], varImp[1:100,1], y= as.factor(class[indexTrain[[3]]]),
- xtest = dataGenes[indexTest[[3]]], varImp[1:100,1], yTest = as.factor(class[indexTest[[3]]]),
- nTree = 500, importance = True)
- resultModel$importance
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement