celestialgod

delete cols

Jul 30th, 2015
343
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 2.49 KB | None | 0 0
  1. numCol = 1500
  2. numRow = 3000
  3. mat = matrix(rnorm(numRow*numCol), numRow)
  4. a = ifelse(runif(numCol) < 0.1, NA, rnorm(numCol))
  5. b = ifelse(runif(numCol) < 0.1, NA, rnorm(numCol))
  6.  
  7. for_ver = function(mat, a, b){
  8.   loc_nonNA = which(!is.na(a)&!is.na(b))
  9.   mat = mat[,loc_nonNA]
  10.   a=a[loc_nonNA]
  11.   b=b[loc_nonNA]
  12.   for (i in 1:nrow(mat))
  13.        mat[i, ]=(mat[i, ]-a)/b  
  14.   mat
  15. }
  16.  
  17. sweep_ver = function(mat, a, b){
  18.   loc_nonNA = which(!is.na(a)&!is.na(b))
  19.   mat = mat[,loc_nonNA]
  20.   a=a[loc_nonNA]
  21.   b=b[loc_nonNA]
  22.   mat=sweep(mat, 2, a, '-')
  23.   mat=sweep(mat, 2, b, '/')
  24.   mat
  25. }
  26.  
  27. scale_ver = function(mat, a, b){
  28.   loc_nonNA = which(!is.na(a)&!is.na(b))
  29.   mat = mat[,loc_nonNA]
  30.   a=a[loc_nonNA]
  31.   b=b[loc_nonNA]
  32.   mat=scale(mat, a, b)
  33.   mat
  34. }
  35.  
  36. sweep_ver2 = function(mat, a, b){
  37.   mat=t(na.omit(t(rbind(mat, a, b))))
  38.   a=mat[nrow(mat)-1, ]
  39.   b=mat[nrow(mat), ]
  40.   mat=mat[1:(nrow(mat)-2), ]
  41.   mat=sweep(mat, 2, a, '-')
  42.   mat=sweep(mat, 2, b, '/')
  43.   mat
  44. }
  45.  
  46. scale_ver2 = function(mat, a, b){
  47.   mat=t(na.omit(t(rbind(mat, a, b))))
  48.   a=mat[nrow(mat)-1, ]
  49.   b=mat[nrow(mat), ]
  50.   mat=mat[1:(nrow(mat)-2), ]
  51.   mat=scale(mat, a, b)
  52.   mat
  53. }
  54.  
  55. apply_ver = function(mat, a, b){
  56.   loc_nonNA = which(!is.na(a)&!is.na(b))
  57.   t(apply(mat, 1, function(x) (x[loc_nonNA] - a[loc_nonNA]) / b[loc_nonNA]))
  58. }
  59.  
  60. library(data.table)
  61. library(dplyr)
  62. DT = data.table(mat)
  63. data_table_ver = function(DT, a, b){
  64.   loc_nonNA = which(!is.na(a)&!is.na(b))
  65.   DT %>% select(loc_nonNA) %>%
  66.     scale(a[loc_nonNA], b[loc_nonNA])
  67. }
  68.  
  69. library(rbenchmark)
  70. benchmark(for_ver(mat, a, b), sweep_ver(mat, a, b), scale_ver(mat, a, b),
  71.   sweep_ver2(mat, a, b), scale_ver2(mat, a, b), apply_ver(mat, a, b),
  72.   data_table_ver(DT, a, b),
  73.   columns = c("test", "replications", "elapsed", "relative"),
  74.     order = "relative", replications = 20)
  75. #                       test replications elapsed relative
  76. # 1       for_ver(mat, a, b)           20    3.03    1.000
  77. # 6     apply_ver(mat, a, b)           20    3.80    1.254
  78. # 3     scale_ver(mat, a, b)           20    5.10    1.683
  79. # 2     sweep_ver(mat, a, b)           20    5.18    1.710
  80. # 7 data_table_ver(DT, a, b)           20    6.04    1.993
  81. # 4    sweep_ver2(mat, a, b)           20    9.75    3.218
  82. # 5    scale_ver2(mat, a, b)           20   10.07    3.323
  83.  
  84. all.equal(for_ver(mat, a, b), scale_ver(mat, a, b), sweep_ver(mat, a, b),
  85.   scale_ver2(mat, a, b), sweep_ver2(mat, a, b), , data_table_ver(DT, a, b),
  86.   apply_ver(mat, a, b), check.attributes = FALSE) # TRUE
Advertisement
Add Comment
Please, Sign In to add comment