Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #Make Example Data
- df_a<-data.frame(A = c(1:9,11), B = letters[1:10], C = sample(1:4,10,replace = T))
- df_b<-data.frame(A = c(1:10,1:10), B = letters[c(1:5,10,9,8,7,5,6:15)], C = sample(1:4,20,replace = T))
- order_of_importance<-c("A"="A","B"="B")
- #Define Recursive Join Function
- recursive_join<-function(left_df,right_df,variable_order){
- #work out which rows don't match with this many cols
- new_left_df<-anti_join(left_df,right_df,by=variable_order)
- #new list of cols minus the last one
- new_variable_order<-head(variable_order, -1)
- last_variable<-tail(variable_order,1)
- print(last_variable)
- #joined df on the rows that matched with this many cols
- intermediate<-inner_join(left_df,right_df,by=variable_order,suffix = c("", ".y"))
- #rename the joined df to be the same as later levels
- #names(intermediate)[which(names(intermediate)==names(variable_order))]<-paste0(names(variable_order),".x")
- #rename the antijoined df to be the same as later levels
- names(new_left_df)[which(names(new_left_df)==names(last_variable))]<-paste0(names(last_variable),".x")
- #write the join order
- if(nrow(intermediate)!=0){
- intermediate$cols_matched<-length(variable_order)
- }
- #condition for recursive call
- if ( is_empty(new_variable_order) ){
- if(nrow(new_left_df)!=0){
- new_left_df[,last_variable]<-NA
- new_left_df$cols_matched<-0
- }
- return(bind_rows(intermediate,
- new_left_df))
- } else if ( nrow(new_left_df)==0 ){
- return(intermediate)
- } else {
- #define output
- return(bind_rows(intermediate,
- recursive_join(new_left_df,right_df,new_variable_order)))
- }
- }
- #apply recursive join and look at output
- joined_output<-recursive_join(df_a,df_b,order_of_importance)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement