Advertisement
Guest User

Untitled

a guest
Sep 20th, 2017
65
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.74 KB | None | 0 0
  1. #Make Example Data
  2.  
  3. df_a<-data.frame(A = c(1:9,11), B = letters[1:10], C = sample(1:4,10,replace = T))
  4. df_b<-data.frame(A = c(1:10,1:10), B = letters[c(1:5,10,9,8,7,5,6:15)], C = sample(1:4,20,replace = T))
  5.  
  6. order_of_importance<-c("A"="A","B"="B")
  7.  
  8. #Define Recursive Join Function
  9.  
  10. recursive_join<-function(left_df,right_df,variable_order){
  11. #work out which rows don't match with this many cols
  12. new_left_df<-anti_join(left_df,right_df,by=variable_order)
  13. #new list of cols minus the last one
  14. new_variable_order<-head(variable_order, -1)
  15. last_variable<-tail(variable_order,1)
  16. print(last_variable)
  17. #joined df on the rows that matched with this many cols
  18. intermediate<-inner_join(left_df,right_df,by=variable_order,suffix = c("", ".y"))
  19. #rename the joined df to be the same as later levels
  20. #names(intermediate)[which(names(intermediate)==names(variable_order))]<-paste0(names(variable_order),".x")
  21. #rename the antijoined df to be the same as later levels
  22. names(new_left_df)[which(names(new_left_df)==names(last_variable))]<-paste0(names(last_variable),".x")
  23. #write the join order
  24. if(nrow(intermediate)!=0){
  25. intermediate$cols_matched<-length(variable_order)
  26. }
  27. #condition for recursive call
  28. if ( is_empty(new_variable_order) ){
  29. if(nrow(new_left_df)!=0){
  30. new_left_df[,last_variable]<-NA
  31. new_left_df$cols_matched<-0
  32. }
  33. return(bind_rows(intermediate,
  34. new_left_df))
  35. } else if ( nrow(new_left_df)==0 ){
  36.  
  37. return(intermediate)
  38. } else {
  39. #define output
  40. return(bind_rows(intermediate,
  41. recursive_join(new_left_df,right_df,new_variable_order)))
  42. }
  43. }
  44.  
  45. #apply recursive join and look at output
  46.  
  47. joined_output<-recursive_join(df_a,df_b,order_of_importance)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement