Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def extract_merged_dataframe(df, num_col_df1, num_col_df2, col_df1, col_df2, joinkey_df1=None, joinkey_df2=None):
- """Extract data in some columns from the merged dataframe. Check for different pandas merged result formats.
- :param df: merged dataframe.
- :param num_col_df1:
- :param num_col_df2:
- :param col_df1: (list of int) column position in df1 to keep (0-based).
- :param col_df2: (list of int) column position in df2 to keep (0-based).
- :param joinkey_df1: (list of int) column position (0-based).
- :param joinkey_df2: (list of int) column position (0-based).
- :return: extracted data from df.
- """
- col_df1 = np.array(col_df1)
- col_df2 = np.array(col_df2)
- if df.shape[1] == num_col_df1 + num_col_df2: # merging keeps same old columns
- col_df2 += num_col_df1
- elif df.shape[1] == num_col_df1 + num_col_df2 + 1: # merging add column 'key_0' to the head
- col_df1 += 1
- col_df2 += num_col_df1 + 1
- elif df.shape[1] <= num_col_df1 + num_col_df2 - 1: # merging deletes (possibly many) duplicated "join-key" columns in df2, keep and do not change order columns in df1.
- raise ValueError('Format for merged result is too complicated.')
- else:
- raise ValueError('Undefined format for merged result.')
- return df.iloc[:, np.concatenate((col_df1, col_df2))]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement