Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- N <- 100
- df <- data.frame(
- Z = sample(1:3, N, replace = TRUE),
- Y_1 = rnorm(N),
- Y_2 = rnorm(N),
- Y_3 = rnorm(N)
- )
- # an annoying solution
- df <- within(df,{
- Y <- rep(NA, nrow(df))
- Y[Z == 1] <- Y_1[Z == 1]
- Y[Z == 2] <- Y_2[Z == 2]
- Y[Z == 3] <- Y_3[Z == 3]
- })
- head(df)
- Z Y_1 Y_2 Y_3 Y
- 1 3 0.89124772 1.4377700 0.05226285 0.05226285
- 2 1 0.89186873 -0.6984839 -0.86141525 0.89186873
- 3 1 -0.01315678 1.5193461 0.18290065 -0.01315678
- 4 3 -0.57857274 -1.4445197 2.03764943 2.03764943
- 5 3 -0.19793692 -0.1818225 1.10270877 1.10270877
- 6 2 1.48291431 2.7264541 0.70129357 2.72645413
- df$Y <- apply(df, 1, function(x) x[x[1]+1] )
- head(df)
- # Z Y_1 Y_2 Y_3 Y
- #1 1 -0.8598997 -0.3180947 1.9374462 -0.8598997
- #2 2 -0.2392902 0.2266245 0.2364991 0.2266245
- #3 1 -0.8733609 -1.3892361 0.3351359 -0.8733609
- #4 3 -0.6533548 -1.1042993 -0.2906852 -0.2906852
- #5 1 -1.7424126 -0.2101860 0.1198945 -1.7424126
- #6 2 -1.9746651 -0.4308746 -0.7849773 -0.4308746
- get_result <- function(dfrow){
- x <- unlist(dfrow[,1:4])
- Y <- x[x[1] + 1]
- }
- library(purrr)
- newdf <- by_row(df, get_result)
- df$Y <- df[-1][cbind(1:nrow(df), df$Z)]
- df
- # Z Y_1 Y_2 Y_3 Y
- #1 3 0.89124772 1.4377700 0.05226285 0.05226285
- #2 1 0.89186873 -0.6984839 -0.86141525 0.89186873
- #3 1 -0.01315678 1.5193461 0.18290065 -0.01315678
- #4 3 -0.57857274 -1.4445197 2.03764943 2.03764943
- #5 3 -0.19793692 -0.1818225 1.10270877 1.10270877
- #6 2 1.48291431 2.7264541 0.70129357 2.72645410
- df <- structure(list(Z = c(3L, 1L, 1L, 3L, 3L, 2L), Y_1 = c(0.89124772,
- 0.89186873, -0.01315678, -0.57857274, -0.19793692, 1.48291431
- ), Y_2 = c(1.43777, -0.6984839, 1.5193461, -1.4445197, -0.1818225,
- 2.7264541), Y_3 = c(0.05226285, -0.86141525, 0.18290065, 2.03764943,
- 1.10270877, 0.70129357)), .Names = c("Z", "Y_1", "Y_2", "Y_3"
- ), row.names = c("1", "2", "3", "4", "5", "6"), class = "data.frame")
- N <- 100000
- df <- data.frame(
- Z = sample(1:3, N, replace = TRUE),
- Y_1 = rnorm(N),
- Y_2 = rnorm(N),
- Y_3 = rnorm(N)
- )
- ind_split <-
- system.time({
- df$Y <- sapply(split(df, 1:nrow(df)), function(x) x[, paste0("Y_", x$Z)])
- head(df)
- })
- revealer <-
- function(list_element){
- col_name <- paste0("Y_", list_element[1, "Z"])
- list_element$Y <- list_element[,col_name]
- return(list_element)
- }
- group_split <-
- system.time({
- split_list <- split(df, df$Z)
- df <- do.call(what = rbind, lapply(split_list, revealer))
- head(df)
- })
- by_hand <-
- system.time({
- # an annoying solution
- df <- within(df,{
- Y <- rep(NA, nrow(df))
- Y[Z == 1] <- Y_1[Z == 1]
- Y[Z == 2] <- Y_2[Z == 2]
- Y[Z == 3] <- Y_3[Z == 3]
- })
- head(df)
- })
- ind_split
- group_split
- by_hand
- > ind_split
- user system elapsed
- 1.023 0.083 1.136
- > group_split
- user system elapsed
- 0.011 0.002 0.013
- > by_hand
- user system elapsed
- 0.001 0.000 0.001
- df$Y <- df[cbind(1:nrow(df), match(paste0('Y_', df$Z), names(df)))]
Add Comment
Please, Sign In to add comment