import heapq def inter1(a, b): """Loop over a and b and check if there are doubles""" return [ai for ai in a for bi in b if ai == bi] def union1(a, b): """Loop over the elements of a+b""" res = [] for x in a + b: if x not in res: res.append(x) return res def inter2(a, b): """Loop over a and check if present in b""" return [ai for ai in a if ai in b] def union2(a, b): """Output a + Loop over b and check if present in a""" return a + [bi for bi in b if bi not in a] def inter3(a, b): """Output a after removing all elements in b""" res = list(a) for ai in a: if ai not in b: res.remove(ai) return res def inter4(a, b): """Make heaps of a and b. Copy to output elements that are in a and b""" ha = list(a) hb = list(b) res = [] heapq.heapify(ha) heapq.heapify(hb) while ha and hb: if ha[0] < hb[0]: heapq.heappop(ha) elif ha[0] > hb[0]: heapq.heappop(hb) else: res.append(heapq.heappop(ha)) heapq.heappop(hb) return res def union4(a, b): """Make heaps of a and b. Copy to output elements that are in a or b""" ha = list(a) hb = list(b) res = [] heapq.heapify(ha) heapq.heapify(hb) while ha and hb: if ha[0] < hb[0]: res.append(heapq.heappop(ha)) elif ha[0] > hb[0]: res.append(heapq.heappop(hb)) else: res.append(heapq.heappop(ha)) heapq.heappop(hb) return res + ha + hb # either ha or hb can contain a tail def inter99(a, b): """Make set of a and b. Output list of intersection.""" return list(set(a) & set(b)) def union99(a, b): """Make set of a and b. Output list of union.""" return list(set(a) | set(b)) def union99a(a, b): """Output list of set of a + b.""" return list(set(a + b)) if __name__ == '__main__': from time import clock from random import randint from collections import defaultdict import math def ranarr(n, N): data = range(N) res = [] for i in range(n): res.append(data.pop(randint(0, N - i - 1))) return res timings = defaultdict(list) def tim(method, a, b, n=100): name = method.__name__ t0 = clock() for i in xrange(n): res = method(a, b) ti = clock() - t0 timings[name].append((len(a), ti)) Ns = [10, 20, 40, 80, 160, 320, 640, 1280, 2560, 5120, 10240] a_arr, b_arr = {}, {} for N in Ns: a_arr[N] = ranarr(N, 3 * N) b_arr[N] = ranarr(N, 3 * N) intersections = [inter1, inter2, inter3, inter4, inter99] unions = [union1, union2, union4, union99, union99a] for method in intersections + unions: for N in Ns: a = a_arr[N] b = b_arr[N] tim(method, a, b, 1) def fit_timings(model, t): a = sum(ti * model(ni) for ni,ti in t) / sum(model(ni) ** 2 for ni,ti in t) err2 = sum((a * model(ni) - ti) ** 2 for ni, ti in t) return a, err2 def check_models(t): res = {} besterr = float("inf") mod_n = lambda n: n mod_n2 = lambda n: n ** 2 mod_nlogn = lambda n: n * math.log(n) n2 = lambda n: n ** 2 for model, mname in [(mod_n, "O(n)") , (mod_n2, "O(n2)"), (mod_nlogn, "O(nlogn)")]: coeff, err2 = fit_timings(model, t) res[mname] = err2 if err2 < besterr: bestmethod = mname besterr = err2 serr, altmethod = min((v, key) for key, v in res.iteritems() if key != bestmethod) ratio = serr / besterr return bestmethod, coeff, ratio, altmethod, res for method in intersections + unions: name = method.__name__ doc = method.__doc__ times = timings[name] bestmethod, coeff, ratio, altmethod, fiterr = check_models(times) print "{0:8s} {1:8s} coeff: {2:6.2e} ratio:{3:6.0f} alt: {4:8s} "\ "{5:s}".format(name, bestmethod, coeff, ratio, altmethod, doc) """ inter1 O(n2) coeff: 8.78e-05 ratio: 704 alt: O(nlogn) Loop over a and b and check if there are doubles inter2 O(n2) coeff: 3.12e-05 ratio: 1958 alt: O(nlogn) Loop over a and check if present in b inter3 O(n2) coeff: 3.47e-05 ratio: 510 alt: O(nlogn) Output a after removing all elements in b inter4 O(nlogn) coeff: 1.03e-05 ratio: 34 alt: O(n) Make heaps of a and b. Copy to output elements that are in a and b inter99 O(n) coeff: 2.71e-07 ratio: 5 alt: O(nlogn) Make set of a and b. Output list of intersection. union1 O(n2) coeff: 5.83e-05 ratio: 7656 alt: O(nlogn) Loop over the elements of a+b union2 O(n2) coeff: 3.06e-05 ratio: 1225 alt: O(nlogn) Output a + Loop over b and check if present in a union4 O(nlogn) coeff: 1.09e-05 ratio: 3 alt: O(n) Make heaps of a and b. Copy to output elements that are in a or b union99 O(n) coeff: 2.94e-07 ratio: 4 alt: O(nlogn) Make set of a and b. Output list of union. union99a O(nlogn) coeff: 2.30e-07 ratio: 1 alt: O(n) Output list of set of a + b. """