Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- _missing = object()
- def sorting_reduce(function, iterable, key=lambda x: x, initializer=_missing):
- """Stable sorting reduce.
- Cumulatively applies the function to the two smallest elements of the set.
- Preserves order of elements of same size.
- """
- import heapq
- iterable = iter(iterable)
- if initializer is _missing:
- initializer = next(iterable, _missing)
- assert initializer is not _missing, "At least one item or initializer expected"
- items = [
- (key(initializer), -1, initializer),
- *((key(x), seq, x) for seq, x in enumerate(iterable)),
- ]
- heapq.heapify(items)
- seq = len(items) - 1
- while len(items) >= 2:
- _, seq_left, left = heapq.heappop(items)
- _, seq_right, right = heapq.heappop(items)
- if seq_left > seq_right:
- left, right = right, left
- new = function(left, right)
- heapq.heappush(items, (key(new), seq, new))
- seq += 1
- _, _, result = heapq.heappop(items)
- return result
- def merge_all(frames, *args, **kwargs):
- assert frames, "At least one frame expected"
- return sorting_reduce(
- lambda left, right: pd.merge(left, right, *args, **kwargs),
- frames,
- key=lambda df: len(df.index),
- )
- # usage example
- merged_df = merge_all([df1, df2, df3], how='outer', left_index=True, right_index=True, copy=False)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement