Advertisement
Guest User

Untitled

a guest
Feb 27th, 2017
68
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.31 KB | None | 0 0
  1. def extract_merged_dataframe(df, num_col_df1, num_col_df2, col_df1, col_df2, joinkey_df1=None, joinkey_df2=None):
  2. """Extract data in some columns from the merged dataframe. Check for different pandas merged result formats.
  3.  
  4. :param df: merged dataframe.
  5. :param num_col_df1:
  6. :param num_col_df2:
  7. :param col_df1: (list of int) column position in df1 to keep (0-based).
  8. :param col_df2: (list of int) column position in df2 to keep (0-based).
  9. :param joinkey_df1: (list of int) column position (0-based).
  10. :param joinkey_df2: (list of int) column position (0-based).
  11. :return: extracted data from df.
  12. """
  13.  
  14. col_df1 = np.array(col_df1)
  15. col_df2 = np.array(col_df2)
  16.  
  17. if df.shape[1] == num_col_df1 + num_col_df2: # merging keeps same old columns
  18. col_df2 += num_col_df1
  19. elif df.shape[1] == num_col_df1 + num_col_df2 + 1: # merging add column 'key_0' to the head
  20. col_df1 += 1
  21. col_df2 += num_col_df1 + 1
  22. elif df.shape[1] <= num_col_df1 + num_col_df2 - 1: # merging deletes (possibly many) duplicated "join-key" columns in df2, keep and do not change order columns in df1.
  23. raise ValueError('Format for merged result is too complicated.')
  24. else:
  25. raise ValueError('Undefined format for merged result.')
  26.  
  27. return df.iloc[:, np.concatenate((col_df1, col_df2))]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement