Advertisement
Guest User

Untitled

a guest
Oct 18th, 2019
81
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.30 KB | None | 0 0
  1. import pandas as pd
  2.  
  3. _missing = object()
  4.  
  5. def sorting_reduce(function, iterable, key=lambda x: x, initializer=_missing):
  6. """Stable sorting reduce.
  7.  
  8. Cumulatively applies the function to the two smallest elements of the set.
  9. Preserves order of elements of same size.
  10. """
  11.  
  12. import heapq
  13.  
  14. iterable = iter(iterable)
  15.  
  16. if initializer is _missing:
  17. initializer = next(iterable, _missing)
  18. assert initializer is not _missing, "At least one item or initializer expected"
  19.  
  20. items = [
  21. (key(initializer), -1, initializer),
  22. *((key(x), seq, x) for seq, x in enumerate(iterable)),
  23. ]
  24.  
  25. heapq.heapify(items)
  26.  
  27. seq = len(items) - 1
  28.  
  29. while len(items) >= 2:
  30. _, seq_left, left = heapq.heappop(items)
  31. _, seq_right, right = heapq.heappop(items)
  32.  
  33. if seq_left > seq_right:
  34. left, right = right, left
  35.  
  36. new = function(left, right)
  37. heapq.heappush(items, (key(new), seq, new))
  38. seq += 1
  39.  
  40. _, _, result = heapq.heappop(items)
  41.  
  42. return result
  43.  
  44. def merge_all(frames, *args, **kwargs):
  45. assert frames, "At least one frame expected"
  46.  
  47. return sorting_reduce(
  48. lambda left, right: pd.merge(left, right, *args, **kwargs),
  49. frames,
  50. key=lambda df: len(df.index),
  51. )
  52.  
  53. # usage example
  54. merged_df = merge_all([df1, df2, df3], how='outer', left_index=True, right_index=True, copy=False)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement