Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(data.table)
- library(plyr)
- library(dplyr)
- library(magrittr)
- tbl_left = data.table(ID = 1:1821350, matrix(rnorm(23*1821350), 1821350))
- tbl_right = data.table(matrix(rnorm(23*1289), 1289))
- ID_right = sample(1:1821350, 1289-130)
- ID_right = c(ID_right, sample(ID_right, 130, TRUE))
- tbl_right %<>% mutate(ID = ID_right)
- tbl_joined <- dplyr:::left_join(tbl_left, tbl_right, by = "ID")
- tbl_joined %>% nrow # 1821480
- ## 1
- tbl_right %>% select(ID) %>% distinct %>% nrow %>% subtract(1289) # -130
- ## 2
- right_ID <- tbl_right %>% select(ID)
- right_ID_dup = right_ID %>% filter(right_ID %>% duplicated())
- right_ID_dup_sort = right_ID_dup %>% arrange(ID)
- right_ID_dup_sort %>% nrow # 130
- ## 3
- comm_ID = right_ID_dup %>% distinct %>%
- rbind(tbl_left %>% select(ID) %>% distinct)
- comm_ID = comm_ID %>% filter(comm_ID %>% duplicated)
- comm_ID %>% nrow # 127
- ## 4
- tbl_right_ID = table(tbl_right$ID)
- tbl_right_ID[match(comm_ID$ID, names(tbl_right_ID))] %>% sum # 257
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement