Advertisement
Guest User

Untitled

a guest
Jun 17th, 2019
98
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.61 KB | None | 0 0
  1. value <- seq(1, 100, length.out=20)
  2. df1 <- data.frame(id=as.character(1:20),
  3. value=value,
  4. stringsAsFactors=F)
  5.  
  6. df2 <- data.frame(id=as.character(c(1:5, 21:23)),
  7. v2=NA,
  8. stringsAsFactors=F)
  9.  
  10. df2Needed <- data.frame(id=as.character(c(1:5, 21:23)),
  11. v2=c(value[1:5], NA, NA, NA),
  12. stringsAsFactors=F)
  13.  
  14. require(data.table)
  15. dt1 <- data.table(df1, key="id")
  16. dt2 <- data.table(df2)
  17.  
  18. dt1[dt2$id, value]
  19.  
  20. # id value
  21. # 1: 1 1.000000
  22. # 2: 2 6.210526
  23. # 3: 3 11.421053
  24. # 4: 4 16.631579
  25. # 5: 5 21.842105
  26. # 6: 21 NA
  27. # 7: 22 NA
  28. # 8: 23 NA
  29.  
  30. # you don't need to have `v2` column in df2
  31. merge(df2, df1, by="id", all.x=T, sort=F)
  32.  
  33. # id v2 value
  34. # 1 1 NA 1.000000
  35. # 2 2 NA 6.210526
  36. # 3 3 NA 11.421053
  37. # 4 4 NA 16.631579
  38. # 5 5 NA 21.842105
  39. # 6 21 NA NA
  40. # 7 22 NA NA
  41. # 8 23 NA NA
  42.  
  43. library(sqldf)
  44. sqldf('SELECT df2.id , df1.value
  45. FROM df2
  46. LEFT JOIN df1
  47. ON df2.id = df1.id')
  48.  
  49. id value
  50. 1 1 1.000000
  51. 2 2 6.210526
  52. 3 3 11.421053
  53. 4 4 16.631579
  54. 5 5 21.842105
  55. 6 21 NA
  56. 7 22 NA
  57. 8 23 NA
  58.  
  59. library(microbenchmark)
  60. microbenchmark(ag(),ar.dt(),ar.me(),tl())
  61.  
  62. Unit: microseconds
  63. expr min lq median uq max
  64. 1 ag() 23071.953 23536.1680 24053.8590 26889.023 34256.354
  65. 2 ar.dt() 3123.972 3284.5890 3348.1155 3523.333 7740.335
  66. 3 ar.me() 950.807 1015.2815 1095.1160 1128.112 6330.243
  67. 4 tl() 41.340 45.8915 68.0785 71.112 187.735
  68.  
  69. N <- 1e6
  70. df1 <- data.frame(id=as.character(1:N),
  71. value=seq(1, 100),
  72. stringsAsFactors=F)
  73. n2 <- 1000
  74. df2 <- data.frame(id=sample(df1$id,n2),
  75. v2=NA,
  76. stringsAsFactors=F)
  77.  
  78. Unit: milliseconds
  79. expr min lq median uq max
  80. 1 ag() 5678.0580 5865.3063 6034.9151 6214.3664 8084.6294
  81. 2 ar.dt() 8373.6083 8612.9496 8867.6164 9104.7913 10423.5247
  82. 3 ar.me() 387.4665 451.0071 506.8269 648.3958 1014.3099
  83. 4 tl() 174.0375 186.8335 214.0468 252.9383 667.6246
  84.  
  85. ag <- function(){
  86. require(sqldf)
  87. sqldf('SELECT df2.id , df1.value
  88. FROM df2
  89. LEFT JOIN df1
  90. ON df2.id = df1.id')
  91. }
  92.  
  93.  
  94. ar.dt <- function(){
  95. require(data.table)
  96. dt1 <- data.table(df1, key="id")
  97. dt2 <- data.table(df2)
  98. dt1[dt2$id, value]
  99. }
  100.  
  101. ar.me <- function(){
  102. merge(df2, df1, by="id", all.x=T, sort=F)
  103. }
  104.  
  105. tl <- function(){
  106. df2Needed <- df2
  107. df2Needed$v2 <- df1$value[match(df2$id, df1$id)]
  108. }
  109.  
  110. ar.dtLight <- function(){
  111. dt1[dt2$id, value]
  112. }
  113.  
  114. library(microbenchmark)
  115. microbenchmark(ag(),ar.dt(),ar.me(),tl(),ar.dtLight,times=1)
  116.  
  117. Unit: microseconds
  118. expr min lq median uq max
  119. 1 ag() 7247593.591 7247593.591 7247593.591 7247593.591 7247593.591
  120. 2 ar.dt() 8543556.967 8543556.967 8543556.967 8543556.967 8543556.967
  121. 3 ar.dtLight 1.139 1.139 1.139 1.139 1.139
  122. 4 ar.me() 462235.106 462235.106 462235.106 462235.106 462235.106
  123. 5 tl() 201988.996 201988.996 201988.996 201988.996 201988.996
  124.  
  125. df2Needed <- merge(df2,df1,by="id",all.x=TRUE, sort=FALSE)
  126. df2Needed <- df2Needed[,c("id","value")]
  127. colNames(df2Needed) <- c("id","v2")
  128.  
  129. df2Needed <- df2
  130. df2Needed$v2 <- df1$value[match(df2$id, df1$id)]
  131.  
  132. > library(dplyr)
  133. > library(qdapTools)
  134. > mutate(df2, v2 = id %l% df1)
  135. # id v2
  136. #1 1 1.000000
  137. #2 2 6.210526
  138. #3 3 11.421053
  139. #4 4 16.631579
  140. #5 5 21.842105
  141. #6 21 NA
  142. #7 22 NA
  143. #8 23 NA
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement