Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- value <- seq(1, 100, length.out=20)
- df1 <- data.frame(id=as.character(1:20),
- value=value,
- stringsAsFactors=F)
- df2 <- data.frame(id=as.character(c(1:5, 21:23)),
- v2=NA,
- stringsAsFactors=F)
- df2Needed <- data.frame(id=as.character(c(1:5, 21:23)),
- v2=c(value[1:5], NA, NA, NA),
- stringsAsFactors=F)
- require(data.table)
- dt1 <- data.table(df1, key="id")
- dt2 <- data.table(df2)
- dt1[dt2$id, value]
- # id value
- # 1: 1 1.000000
- # 2: 2 6.210526
- # 3: 3 11.421053
- # 4: 4 16.631579
- # 5: 5 21.842105
- # 6: 21 NA
- # 7: 22 NA
- # 8: 23 NA
- # you don't need to have `v2` column in df2
- merge(df2, df1, by="id", all.x=T, sort=F)
- # id v2 value
- # 1 1 NA 1.000000
- # 2 2 NA 6.210526
- # 3 3 NA 11.421053
- # 4 4 NA 16.631579
- # 5 5 NA 21.842105
- # 6 21 NA NA
- # 7 22 NA NA
- # 8 23 NA NA
- library(sqldf)
- sqldf('SELECT df2.id , df1.value
- FROM df2
- LEFT JOIN df1
- ON df2.id = df1.id')
- id value
- 1 1 1.000000
- 2 2 6.210526
- 3 3 11.421053
- 4 4 16.631579
- 5 5 21.842105
- 6 21 NA
- 7 22 NA
- 8 23 NA
- library(microbenchmark)
- microbenchmark(ag(),ar.dt(),ar.me(),tl())
- Unit: microseconds
- expr min lq median uq max
- 1 ag() 23071.953 23536.1680 24053.8590 26889.023 34256.354
- 2 ar.dt() 3123.972 3284.5890 3348.1155 3523.333 7740.335
- 3 ar.me() 950.807 1015.2815 1095.1160 1128.112 6330.243
- 4 tl() 41.340 45.8915 68.0785 71.112 187.735
- N <- 1e6
- df1 <- data.frame(id=as.character(1:N),
- value=seq(1, 100),
- stringsAsFactors=F)
- n2 <- 1000
- df2 <- data.frame(id=sample(df1$id,n2),
- v2=NA,
- stringsAsFactors=F)
- Unit: milliseconds
- expr min lq median uq max
- 1 ag() 5678.0580 5865.3063 6034.9151 6214.3664 8084.6294
- 2 ar.dt() 8373.6083 8612.9496 8867.6164 9104.7913 10423.5247
- 3 ar.me() 387.4665 451.0071 506.8269 648.3958 1014.3099
- 4 tl() 174.0375 186.8335 214.0468 252.9383 667.6246
- ag <- function(){
- require(sqldf)
- sqldf('SELECT df2.id , df1.value
- FROM df2
- LEFT JOIN df1
- ON df2.id = df1.id')
- }
- ar.dt <- function(){
- require(data.table)
- dt1 <- data.table(df1, key="id")
- dt2 <- data.table(df2)
- dt1[dt2$id, value]
- }
- ar.me <- function(){
- merge(df2, df1, by="id", all.x=T, sort=F)
- }
- tl <- function(){
- df2Needed <- df2
- df2Needed$v2 <- df1$value[match(df2$id, df1$id)]
- }
- ar.dtLight <- function(){
- dt1[dt2$id, value]
- }
- library(microbenchmark)
- microbenchmark(ag(),ar.dt(),ar.me(),tl(),ar.dtLight,times=1)
- Unit: microseconds
- expr min lq median uq max
- 1 ag() 7247593.591 7247593.591 7247593.591 7247593.591 7247593.591
- 2 ar.dt() 8543556.967 8543556.967 8543556.967 8543556.967 8543556.967
- 3 ar.dtLight 1.139 1.139 1.139 1.139 1.139
- 4 ar.me() 462235.106 462235.106 462235.106 462235.106 462235.106
- 5 tl() 201988.996 201988.996 201988.996 201988.996 201988.996
- df2Needed <- merge(df2,df1,by="id",all.x=TRUE, sort=FALSE)
- df2Needed <- df2Needed[,c("id","value")]
- colNames(df2Needed) <- c("id","v2")
- df2Needed <- df2
- df2Needed$v2 <- df1$value[match(df2$id, df1$id)]
- > library(dplyr)
- > library(qdapTools)
- > mutate(df2, v2 = id %l% df1)
- # id v2
- #1 1 1.000000
- #2 2 6.210526
- #3 3 11.421053
- #4 4 16.631579
- #5 5 21.842105
- #6 21 NA
- #7 22 NA
- #8 23 NA
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement