Guest User

Untitled

a guest
Oct 22nd, 2018
81
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.98 KB | None | 0 0
  1. N <- 100
  2.  
  3. df <- data.frame(
  4. Z = sample(1:3, N, replace = TRUE),
  5. Y_1 = rnorm(N),
  6. Y_2 = rnorm(N),
  7. Y_3 = rnorm(N)
  8. )
  9.  
  10. # an annoying solution
  11. df <- within(df,{
  12. Y <- rep(NA, nrow(df))
  13. Y[Z == 1] <- Y_1[Z == 1]
  14. Y[Z == 2] <- Y_2[Z == 2]
  15. Y[Z == 3] <- Y_3[Z == 3]
  16. })
  17.  
  18. head(df)
  19.  
  20. Z Y_1 Y_2 Y_3 Y
  21. 1 3 0.89124772 1.4377700 0.05226285 0.05226285
  22. 2 1 0.89186873 -0.6984839 -0.86141525 0.89186873
  23. 3 1 -0.01315678 1.5193461 0.18290065 -0.01315678
  24. 4 3 -0.57857274 -1.4445197 2.03764943 2.03764943
  25. 5 3 -0.19793692 -0.1818225 1.10270877 1.10270877
  26. 6 2 1.48291431 2.7264541 0.70129357 2.72645413
  27.  
  28. df$Y <- apply(df, 1, function(x) x[x[1]+1] )
  29. head(df)
  30. # Z Y_1 Y_2 Y_3 Y
  31. #1 1 -0.8598997 -0.3180947 1.9374462 -0.8598997
  32. #2 2 -0.2392902 0.2266245 0.2364991 0.2266245
  33. #3 1 -0.8733609 -1.3892361 0.3351359 -0.8733609
  34. #4 3 -0.6533548 -1.1042993 -0.2906852 -0.2906852
  35. #5 1 -1.7424126 -0.2101860 0.1198945 -1.7424126
  36. #6 2 -1.9746651 -0.4308746 -0.7849773 -0.4308746
  37.  
  38. get_result <- function(dfrow){
  39. x <- unlist(dfrow[,1:4])
  40. Y <- x[x[1] + 1]
  41. }
  42.  
  43. library(purrr)
  44. newdf <- by_row(df, get_result)
  45.  
  46. df$Y <- df[-1][cbind(1:nrow(df), df$Z)]
  47. df
  48. # Z Y_1 Y_2 Y_3 Y
  49. #1 3 0.89124772 1.4377700 0.05226285 0.05226285
  50. #2 1 0.89186873 -0.6984839 -0.86141525 0.89186873
  51. #3 1 -0.01315678 1.5193461 0.18290065 -0.01315678
  52. #4 3 -0.57857274 -1.4445197 2.03764943 2.03764943
  53. #5 3 -0.19793692 -0.1818225 1.10270877 1.10270877
  54. #6 2 1.48291431 2.7264541 0.70129357 2.72645410
  55.  
  56. df <- structure(list(Z = c(3L, 1L, 1L, 3L, 3L, 2L), Y_1 = c(0.89124772,
  57. 0.89186873, -0.01315678, -0.57857274, -0.19793692, 1.48291431
  58. ), Y_2 = c(1.43777, -0.6984839, 1.5193461, -1.4445197, -0.1818225,
  59. 2.7264541), Y_3 = c(0.05226285, -0.86141525, 0.18290065, 2.03764943,
  60. 1.10270877, 0.70129357)), .Names = c("Z", "Y_1", "Y_2", "Y_3"
  61. ), row.names = c("1", "2", "3", "4", "5", "6"), class = "data.frame")
  62.  
  63. N <- 100000
  64.  
  65. df <- data.frame(
  66. Z = sample(1:3, N, replace = TRUE),
  67. Y_1 = rnorm(N),
  68. Y_2 = rnorm(N),
  69. Y_3 = rnorm(N)
  70. )
  71.  
  72.  
  73. ind_split <-
  74. system.time({
  75. df$Y <- sapply(split(df, 1:nrow(df)), function(x) x[, paste0("Y_", x$Z)])
  76. head(df)
  77. })
  78.  
  79. revealer <-
  80. function(list_element){
  81. col_name <- paste0("Y_", list_element[1, "Z"])
  82. list_element$Y <- list_element[,col_name]
  83. return(list_element)
  84. }
  85.  
  86. group_split <-
  87. system.time({
  88. split_list <- split(df, df$Z)
  89. df <- do.call(what = rbind, lapply(split_list, revealer))
  90. head(df)
  91. })
  92.  
  93.  
  94. by_hand <-
  95. system.time({
  96. # an annoying solution
  97. df <- within(df,{
  98. Y <- rep(NA, nrow(df))
  99. Y[Z == 1] <- Y_1[Z == 1]
  100. Y[Z == 2] <- Y_2[Z == 2]
  101. Y[Z == 3] <- Y_3[Z == 3]
  102. })
  103. head(df)
  104. })
  105.  
  106.  
  107. ind_split
  108. group_split
  109. by_hand
  110.  
  111. > ind_split
  112. user system elapsed
  113. 1.023 0.083 1.136
  114. > group_split
  115. user system elapsed
  116. 0.011 0.002 0.013
  117. > by_hand
  118. user system elapsed
  119. 0.001 0.000 0.001
  120.  
  121. df$Y <- df[cbind(1:nrow(df), match(paste0('Y_', df$Z), names(df)))]
Add Comment
Please, Sign In to add comment