Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import itertools
- def multi_merge(*iterables, key=None, reverse=False):
- """
- Non-recursive mergesort-style algorithm:
- Maintain a "tree of losers" of comparisons
- as winners advance to the root.
- (c.f. Knuth Volume 3, Chapter 5.4.1. on "Multiway Merging").
- Essentially use a heap, but instead of pushing new items to the
- root and sifting down, push them directly to the leaf nodes--items
- only ever move toward the root.
- Example for n=5 iterables A--E:
- 0
- 1 2
- 3 4 5 6
- 7 8 | | |
- | | C D E
- A B
- (shift = -3)
- Always:
- n leaf nodes
- - get their values directly from iterators
- n-1 internal nodes
- - get their values from their children
- """
- n = len(iterables)
- if n == 0:
- return
- if n == 1:
- yield from iterables[0]
- return
- tree = [object()] * (n + n - 1)
- sentinel = object()
- if key is None:
- def value(x): return x
- if reverse:
- def compare(x, y): return y < x
- else:
- def compare(x, y): return x < y
- else:
- def value(x): return x[1]
- iterables = [zip(map(key, it), it) for it in iterables]
- if reverse:
- def compare(x, y): return y[0] < x[0]
- else:
- def compare(x, y): return x[0] < y[0]
- # For stability, rotate the list so that the first iterator
- # moves from the smallest index to the leftmost index.
- shift = n - (1 << n.bit_length())
- getters = [itertools.chain(iterables[i + shift],
- itertools.repeat(sentinel)
- ).__next__
- for i in range(len(iterables))]
- # initialize leaves
- for i, getter in enumerate(getters):
- tree[i + n - 1] = getter()
- # general case with keys
- for i in reversed(range(n - 1)):
- while i < n - 1:
- left = 2 * i + 1
- right = left + 1
- t_left = tree[left]
- t_right = tree[right]
- if t_right is sentinel:
- winner = left
- elif t_left is sentinel:
- winner = right
- elif compare(t_right, t_left):
- winner = right
- else:
- winner = left
- tree[i] = tree[winner]
- i = winner
- tree[i] = getters[i - (n - 1)]()
- # Main loop:
- # - yield the overall winner
- # - replace parents with their better children
- # - replace leaves using original iterables
- while True:
- t0 = tree[0]
- if t0 is sentinel:
- return
- yield value(t0)
- i = 0
- while i < n - 1:
- left = 2 * i + 1
- right = left + 1
- t_left = tree[left]
- t_right = tree[right]
- if t_right is sentinel:
- winner = left
- elif t_left is sentinel:
- winner = right
- elif compare(t_right, t_left):
- winner = right
- else:
- winner = left
- tree[i] = tree[winner]
- i = winner
- tree[i] = getters[i - (n - 1)]()
- if __name__ == "__main__":
- from timeit import timeit
- import random
- from test import support
- py_heapq = support.import_fresh_module('heapq', blocked=['_heapq'])
- print("number of iterables, (time to pure-python multi_merge / pure-python heapq merge)")
- n = 0
- while True:
- n = max(int(1.2*n), n+1)
- t1 = t2 = 0
- for _ in range(3):
- # lists = [
- # range(i, 100, 10)
- # for i in range(5)
- # ]
- lists = [
- sorted(random.random() for _ in
- range(random.randint(200, 1000)))
- for _ in range(n)
- ]
- l1 = list(multi_merge(*lists, key=abs))
- l2 = list(py_heapq.merge(*lists, key=abs))
- assert l1 == l2, l1
- t1 += timeit(lambda: list(multi_merge(*lists)), number=50)
- t2 += timeit(lambda: list(py_heapq.merge(*lists)), number=50)
- print(n, t1/t2, sep='\t')
Add Comment
Please, Sign In to add comment