Guest User

Untitled

a guest
Jul 18th, 2018
124
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.81 KB | None | 0 0
  1. names(Y1.ls)
  2. #here my code starts:
  3. #some helper functions
  4. splitchar <- function(x){split(x,x)}
  5. class.summary.f <- function(class.o){
  6. lapply(splitchar(perf.metrics),function(metr) evaluation(class.o,meas=metr)@score ) #metr is misclassification rates and so on
  7. }
  8.  
  9. #perfomance metric to use:
  10. perf.metrics <- c("misclassification", "sensitivity", "specificity")
  11. set.seed(123)
  12. results.of.50.iteration <- lapply(1:50,function(i){
  13. Y.ls=mapply(function(v1,s1){
  14. sample(v1,s1,replace=T) #sample sample
  15. },Y1.ls,Y.size.ls)
  16.  
  17. X <- X1[unlist(Y.ls),] #create "simulated"(desired class sizes) subset of the original data
  18. Y <- Y1[unlist(Y.ls)]
  19.  
  20. lset <- GenerateLearningsets(y =Y , method = "bootstrap", niter=1, strat = TRUE)
  21.  
  22.  
  23. y<-Y
  24.  
  25.  
  26. ### clasification code below here:
  27.  
  28. simurealCV <-list() #more flexible data structure than vecor, you don't need 'j'(i.e count)
  29.  
  30. ####################################################
  31. ##### t-test as preliminary variable selection #####
  32. ####################################################
  33.  
  34. selttest<-GeneSelection(X, y, learningsets = lset, method="t.test" , scheme="one-vs-all")
  35.  
  36.  
  37.  
  38.  
  39.  
  40. ### dlda t-test ###
  41.  
  42. #j<-1, you don't need count
  43. # 1
  44. simurealCV <- c(simurealCV,list('dldaCMA_NG20' =class.summary.f(try(
  45. #you don't have to use name for classiffier, list(class.summary.f(.... will work
  46. classification(X, y, learningsets = lset,
  47. genesel=selttest, classifier = dldaCMA, nbgene = 20)
  48. )
  49. )))
  50.  
  51.  
  52. #next classifier
  53. simurealCV <- c(simurealCV,list('dldaCMA_NG50'=class.summary.f(try(
  54. classification(X, y, learningsets = lset,
  55. genesel=selttest, classifier = dldaCMA, nbgene = 50)
  56. )
  57. )))
  58.  
  59. simurealCV
  60. })
  61.  
  62.  
  63. results.of.50.iteration.df <- melt(results.of.50.iteration)
  64. head(results.of.50.iteration.df,10)
  65. #value L3 L2 L1
  66. #1 0.00000000 misclassification dldaCMA_NG20 1
  67. #2 1.00000000 sensitivity dldaCMA_NG20 1
  68. #3 1.00000000 specificity dldaCMA_NG20 1
  69. #4 0.02941176 misclassification dldaCMA_NG50 1
  70. #5 1.00000000 sensitivity dldaCMA_NG50 1
  71. #6 0.66666667 specificity dldaCMA_NG50 1
  72. #7 0.10810811 misclassification dldaCMA_NG20 2
  73. #8 1.00000000 sensitivity dldaCMA_NG20 2
  74. #9 0.20000000 specificity dldaCMA_NG20 2
  75. #10 0.10810811 misclassification dldaCMA_NG50 2
  76.  
  77. # one value for L3+L2+L1 combination because only 1 cross validation iteration
  78. #this frame can be aggerageted futher
  79. #to average cross sampling iterations, i.e. get a data frame with columns for misclassification mean(M),SD, sensitivity and specifity
  80. #by classifier
  81. results.of.50.iteration.df.M <-dcast(results.of.50.iteration.df,L2~L3,fun=mean,na.rm=T) $na.rm remove NA i.e. failed classifier runs
  82. head(results.of.50.iteration.df.M)
  83. #L2 misclassification sensitivity specificity
  84. #1 dldaCMA_NG20 0.07225663 0.9790976 0.4363333
  85. #2 dldaCMA_NG50 0.07926080 0.9657780 0.4970000
  86.  
  87. #to calculate standart deviation cross sampling iterations
  88. dcast(results.of.50.iteration.df,L2~L3,fun=sd,na.rm=T)
  89. #L2 misclassification sensitivity specificity
  90. #1 dldaCMA_NG20 0.04422964 0.02726753 0.3307672
  91. #2 dldaCMA_NG50 0.04619228 0.03935401 0.3115991
  92.  
  93. #another more flexible way to get varios aggregation cross sampling iterations
  94. results.of.50.iteration.df.aggr <- melt(
  95. ddply(results.of.50.iteration.df,.(L2,L3),function(x) c(mean=mean(x$value,na.rm=T),sd=sd(x$value,na.rm=T))),
  96. meas=c('mean','sd'))
  97. #results.of.50.iteration.df.aggr
  98. #L2 L3 variable value
  99. #1 dldaCMA_NG20 misclassification mean 0.07225663
  100. #2 dldaCMA_NG20 sensitivity mean 0.97909763
  101. #3 dldaCMA_NG20 specificity mean 0.43633333
  102. #4 dldaCMA_NG50 misclassification mean 0.07926080
  103. #5 dldaCMA_NG50 sensitivity mean 0.96577800
  104. #6 dldaCMA_NG50 specificity mean 0.49700000
  105. #7 dldaCMA_NG20 misclassification sd 0.04422964
  106. #8 dldaCMA_NG20 sensitivity sd 0.02726753
  107. #9 dldaCMA_NG20 specificity sd 0.33076716
  108. #10 dldaCMA_NG50 misclassification sd 0.04619228
  109. #11 dldaCMA_NG50 sensitivity sd 0.03935401
  110. #12 dldaCMA_NG50 specificity sd 0.31159909
  111.  
  112.  
  113. #to get table with means for the all perfomance metrics, and standart deviation for misclassifcation rate
  114. results.of.50.iteration <- dcast(subset(results.of.50.iteration.df.aggr,variable=='mean'|(variable=='sd'&L3=='misclassification')),
  115. L2~L3+variable,fun=mean)
  116. results.of.50.iteration
  117. #L2 misclassification_mean misclassification_sd sensitivity_mean
  118. #1 dldaCMA_NG20 0.07225663 0.04422964 0.9790976
  119. #2 dldaCMA_NG50 0.07926080 0.04619228 0.9657780
  120. #specificity_mean
  121. #1 0.4363333
  122. #2 0.4970000
  123.  
  124.  
  125.  
  126. write.csv(results.of.50.iteration, na="", #NA will be empty cells
  127. file = "E:/bioconductor/results.csv")
Add Comment
Please, Sign In to add comment