Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- require(dplyr)
- set.seed(1)
- large_df <- data_frame(id = rep(paste0('id',1:40), each = 3),
- age = c(rep(NA,60),rep (sample(20), each = 3)),
- col3 = rep(letters[1:20],6), col4 = rep(1:60,2))
- small_df <- data_frame(id = paste0('id',1:20),
- age = sample(20))
- for(i in nrow(large_df)) {
- if (large_df[i,'id'] %in% small_df$id == TRUE) {
- large_df[i,'age'] <- small_df$age[which(small_df$id %in% large_df[i,'id'])]
- }
- }
- large_df$age[1:60] <- rep(small_df$age, each = 3)
- large_df
- # A tibble: 120 x 4
- id age col3 col4
- <chr> <int> <chr> <int>
- 1 id1 6 a 1
- 2 id1 6 b 2
- 3 id1 6 c 3
- 4 id2 8 d 4
- 5 id2 8 e 5
- 6 id2 8 f 6
- 7 id3 11 g 7
- 8 id3 11 h 8
- 9 id3 11 i 9
- 10 id4 16 j 10
- # ... with 110 more rows
- result =
- large_df %>%
- left_join(small_df, by = 'id') %>%
- mutate(age = ifelse(is.na(age.x), age.y, age.x)) %>%
- dplyr::select(-age.x, -age.y)
- result
- # A tibble: 120 x 4
- id col3 col4 age
- <chr> <chr> <int> <int>
- 1 id1 a 1 19
- 2 id1 b 2 19
- 3 id1 c 3 19
- 4 id2 d 4 5
Add Comment
Please, Sign In to add comment