SHARE
TWEET

Untitled

a guest Jul 17th, 2019 53 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. > fibo <- function(n) {
  2. +   if ( n < 2 ) n
  3. +   else fibo(n-1) + fibo(n-2)
  4. + }
  5. > system.time(for(i in 0:26) fibo(i))
  6.    user  system elapsed
  7.    7.48    0.00    7.52
  8. > system.time(sapply(0:26, fibo))
  9.    user  system elapsed
  10.    7.50    0.00    7.54
  11. > system.time(lapply(0:26, fibo))
  12.    user  system elapsed
  13.    7.48    0.04    7.54
  14. > library(plyr)
  15. > system.time(ldply(0:26, fibo))
  16.    user  system elapsed
  17.    7.52    0.00    7.58
  18.      
  19. library(snow)
  20. cl <- makeSOCKcluster(c("localhost","localhost"))
  21. parSapply(cl, 1:20, get("+"), 3)
  22.      
  23. parLapply(cl, x, fun, ...)
  24. parSapply(cl, X, FUN, ..., simplify = TRUE, USE.NAMES = TRUE)
  25. parApply(cl, X, MARGIN, FUN, ...)
  26. parRapply(cl, x, fun, ...)
  27. parCapply(cl, x, fun, ...)
  28.      
  29. > df <- 1:10
  30. > # *apply example
  31. > lapply(2:3, function(i) df <- df * i)
  32. > df
  33.  [1]  1  2  3  4  5  6  7  8  9 10
  34. > # for loop example
  35. > for(i in 2:3) df <- df * i
  36. > df
  37.  [1]  6 12 18 24 30 36 42 48 54 60
  38.      
  39. set.seed(1)  #for reproducability of the results
  40.  
  41. # The data
  42. X <- rnorm(100000)
  43. Y <- as.factor(sample(letters[1:5],100000,replace=T))
  44. Z <- as.factor(sample(letters[1:10],100000,replace=T))
  45.  
  46. # the function forloop that averages X over every combination of Y and Z
  47. forloop <- function(x,y,z){
  48. # These ones are for optimization, so the functions
  49. #levels() and length() don't have to be called more than once.
  50.   ylev <- levels(y)
  51.   zlev <- levels(z)
  52.   n <- length(ylev)
  53.   p <- length(zlev)
  54.  
  55.   out <- matrix(NA,ncol=p,nrow=n)
  56.   for(i in 1:n){
  57.       for(j in 1:p){
  58.           out[i,j] <- (mean(x[y==ylev[i] & z==zlev[j]]))
  59.       }
  60.   }
  61.   rownames(out) <- ylev
  62.   colnames(out) <- zlev
  63.   return(out)
  64. }
  65.  
  66. # Used on the generated data
  67. forloop(X,Y,Z)
  68.  
  69. # The same using tapply
  70. tapply(X,list(Y,Z),mean)
  71.      
  72. > system.time(forloop(X,Y,Z))
  73.    user  system elapsed
  74.    0.94    0.02    0.95
  75.  
  76. > system.time(tapply(X,list(Y,Z),mean))
  77.    user  system elapsed
  78.    0.06    0.00    0.06
  79.      
  80. > system.time({z <- numeric(1e6); for(i in y) z[i] <- foo(i)})
  81.    user  system elapsed
  82.    3.54    0.00    3.53
  83. > system.time(z <- lapply(y, foo))
  84.    user  system elapsed
  85.    2.89    0.00    2.91
  86. > system.time(z <- vapply(y, foo, numeric(1)))
  87.    user  system elapsed
  88.    1.35    0.00    1.36
  89.      
  90. foo <- function(x) {
  91.    x <- x+1
  92.  }
  93. y <- numeric(1e6)
  94. system.time({z <- numeric(1e6); for(i in y) z[i] <- foo(i)})
  95. #   user  system elapsed
  96. #  4.967   0.049   7.293
  97. system.time(z <- sapply(y, foo))
  98. #   user  system elapsed
  99. #  5.256   0.134   7.965
  100. system.time(z <- lapply(y, foo))
  101. #   user  system elapsed
  102. #  2.179   0.126   3.301
  103.      
  104. set.seed(1)  #for reproducability of the results
  105.  
  106. # The data - copied from Joris Meys answer
  107. X <- rnorm(100000)
  108. Y <- as.factor(sample(letters[1:5],100000,replace=T))
  109. Z <- as.factor(sample(letters[1:10],100000,replace=T))
  110.  
  111. # an R way to generate tapply functionality that is fast and
  112. # shows more general principles about fast R coding
  113. YZ <- interaction(Y, Z)
  114. XS <- split(X, YZ)
  115. m <- vapply(XS, mean, numeric(1))
  116. m <- matrix(m, nrow = length(levels(Y)))
  117. rownames(m) <- levels(Y)
  118. colnames(m) <- levels(Z)
  119. m
  120.      
  121. df <- data.frame(id = rep(letters[1:10], 100000),
  122.                  value = rnorm(1000000))
  123.  
  124. f1 <- function(x)
  125.   tapply(x$value, x$id, sum)
  126.  
  127. f2 <- function(x){
  128.   res <- 0
  129.   for(i in seq_along(l <- unique(x$id)))
  130.     res[i] <- sum(x$value[x$id == l[i]])
  131.   names(res) <- l
  132.   res
  133. }            
  134.  
  135. library(microbenchmark)
  136.  
  137. > microbenchmark(f1(df), f2(df), times=100)
  138. Unit: milliseconds
  139.    expr      min       lq   median       uq      max neval
  140.  f1(df) 28.02612 28.28589 28.46822 29.20458 32.54656   100
  141.  f2(df) 38.02241 41.42277 41.80008 42.05954 45.94273   100
  142.      
  143. mat <- matrix(rnorm(1000000), nrow=1000)
  144.  
  145. f3 <- function(x)
  146.   apply(x, 2, sum)
  147.  
  148. f4 <- function(x){
  149.   res <- 0
  150.   for(i in 1:ncol(x))
  151.     res[i] <- sum(x[,i])
  152.   res
  153. }
  154.  
  155. > microbenchmark(f3(mat), f4(mat), times=100)
  156. Unit: milliseconds
  157.     expr      min       lq   median       uq      max neval
  158.  f3(mat) 14.87594 15.44183 15.87897 17.93040 19.14975   100
  159.  f4(mat) 12.01614 12.19718 12.40003 15.00919 40.59100   100
  160.      
  161. f5 <- function(x)
  162.   colSums(x)
  163.  
  164. > microbenchmark(f5(mat), times=100)
  165. Unit: milliseconds
  166.     expr      min       lq   median       uq      max neval
  167.  f5(mat) 1.362388 1.405203 1.413702 1.434388 1.992909   100
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top