Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- names(Y1.ls)
- #here my code starts:
- #some helper functions
- splitchar <- function(x){split(x,x)}
- class.summary.f <- function(class.o){
- lapply(splitchar(perf.metrics),function(metr) evaluation(class.o,meas=metr)@score ) #metr is misclassification rates and so on
- }
- #perfomance metric to use:
- perf.metrics <- c("misclassification", "sensitivity", "specificity")
- set.seed(123)
- results.of.50.iteration <- lapply(1:50,function(i){
- Y.ls=mapply(function(v1,s1){
- sample(v1,s1,replace=T) #sample sample
- },Y1.ls,Y.size.ls)
- X <- X1[unlist(Y.ls),] #create "simulated"(desired class sizes) subset of the original data
- Y <- Y1[unlist(Y.ls)]
- lset <- GenerateLearningsets(y =Y , method = "bootstrap", niter=1, strat = TRUE)
- y<-Y
- ### clasification code below here:
- simurealCV <-list() #more flexible data structure than vecor, you don't need 'j'(i.e count)
- ####################################################
- ##### t-test as preliminary variable selection #####
- ####################################################
- selttest<-GeneSelection(X, y, learningsets = lset, method="t.test" , scheme="one-vs-all")
- ### dlda t-test ###
- #j<-1, you don't need count
- # 1
- simurealCV <- c(simurealCV,list('dldaCMA_NG20' =class.summary.f(try(
- #you don't have to use name for classiffier, list(class.summary.f(.... will work
- classification(X, y, learningsets = lset,
- genesel=selttest, classifier = dldaCMA, nbgene = 20)
- )
- )))
- #next classifier
- simurealCV <- c(simurealCV,list('dldaCMA_NG50'=class.summary.f(try(
- classification(X, y, learningsets = lset,
- genesel=selttest, classifier = dldaCMA, nbgene = 50)
- )
- )))
- simurealCV
- })
- results.of.50.iteration.df <- melt(results.of.50.iteration)
- head(results.of.50.iteration.df,10)
- #value L3 L2 L1
- #1 0.00000000 misclassification dldaCMA_NG20 1
- #2 1.00000000 sensitivity dldaCMA_NG20 1
- #3 1.00000000 specificity dldaCMA_NG20 1
- #4 0.02941176 misclassification dldaCMA_NG50 1
- #5 1.00000000 sensitivity dldaCMA_NG50 1
- #6 0.66666667 specificity dldaCMA_NG50 1
- #7 0.10810811 misclassification dldaCMA_NG20 2
- #8 1.00000000 sensitivity dldaCMA_NG20 2
- #9 0.20000000 specificity dldaCMA_NG20 2
- #10 0.10810811 misclassification dldaCMA_NG50 2
- # one value for L3+L2+L1 combination because only 1 cross validation iteration
- #this frame can be aggerageted futher
- #to average cross sampling iterations, i.e. get a data frame with columns for misclassification mean(M),SD, sensitivity and specifity
- #by classifier
- results.of.50.iteration.df.M <-dcast(results.of.50.iteration.df,L2~L3,fun=mean,na.rm=T) $na.rm remove NA i.e. failed classifier runs
- head(results.of.50.iteration.df.M)
- #L2 misclassification sensitivity specificity
- #1 dldaCMA_NG20 0.07225663 0.9790976 0.4363333
- #2 dldaCMA_NG50 0.07926080 0.9657780 0.4970000
- #to calculate standart deviation cross sampling iterations
- dcast(results.of.50.iteration.df,L2~L3,fun=sd,na.rm=T)
- #L2 misclassification sensitivity specificity
- #1 dldaCMA_NG20 0.04422964 0.02726753 0.3307672
- #2 dldaCMA_NG50 0.04619228 0.03935401 0.3115991
- #another more flexible way to get varios aggregation cross sampling iterations
- results.of.50.iteration.df.aggr <- melt(
- ddply(results.of.50.iteration.df,.(L2,L3),function(x) c(mean=mean(x$value,na.rm=T),sd=sd(x$value,na.rm=T))),
- meas=c('mean','sd'))
- #results.of.50.iteration.df.aggr
- #L2 L3 variable value
- #1 dldaCMA_NG20 misclassification mean 0.07225663
- #2 dldaCMA_NG20 sensitivity mean 0.97909763
- #3 dldaCMA_NG20 specificity mean 0.43633333
- #4 dldaCMA_NG50 misclassification mean 0.07926080
- #5 dldaCMA_NG50 sensitivity mean 0.96577800
- #6 dldaCMA_NG50 specificity mean 0.49700000
- #7 dldaCMA_NG20 misclassification sd 0.04422964
- #8 dldaCMA_NG20 sensitivity sd 0.02726753
- #9 dldaCMA_NG20 specificity sd 0.33076716
- #10 dldaCMA_NG50 misclassification sd 0.04619228
- #11 dldaCMA_NG50 sensitivity sd 0.03935401
- #12 dldaCMA_NG50 specificity sd 0.31159909
- #to get table with means for the all perfomance metrics, and standart deviation for misclassifcation rate
- results.of.50.iteration <- dcast(subset(results.of.50.iteration.df.aggr,variable=='mean'|(variable=='sd'&L3=='misclassification')),
- L2~L3+variable,fun=mean)
- results.of.50.iteration
- #L2 misclassification_mean misclassification_sd sensitivity_mean
- #1 dldaCMA_NG20 0.07225663 0.04422964 0.9790976
- #2 dldaCMA_NG50 0.07926080 0.04619228 0.9657780
- #specificity_mean
- #1 0.4363333
- #2 0.4970000
- write.csv(results.of.50.iteration, na="", #NA will be empty cells
- file = "E:/bioconductor/results.csv")
Add Comment
Please, Sign In to add comment