import heapq
def inter1(a, b):
"""Loop over a and b and check if there are doubles"""
return [ai for ai in a for bi in b if ai == bi]
def union1(a, b):
"""Loop over the elements of a+b"""
res = []
for x in a + b:
if x not in res:
res.append(x)
return res
def inter6(a, b):
"""Make set of b and loop over a and check if ai in b"""
b = set(b)
return [ai for ai in a if ai in b]
def inter2(a, b):
"""Loop over a and check if present in b"""
return [ai for ai in a if ai in b]
def union2(a, b):
"""Output a + Loop over b and check if present in a"""
return a + [bi for bi in b if bi not in a]
def union6(a, b):
"""Make set of a. Output a + loop over b and check if present in a"""
sa = set(a)
return a + [bi for bi in b if bi not in sa]
def inter3(a, b):
"""Output a after removing all elements in b"""
res = list(a)
for ai in a:
if ai not in b: res.remove(ai)
return res
def inter4(a, b):
"""Make heaps of a and b. Copy to output elements that are in a and b"""
ha = list(a)
hb = list(b)
res = []
heapq.heapify(ha)
heapq.heapify(hb)
while ha and hb:
if ha[0] < hb[0]:
heapq.heappop(ha)
elif ha[0] > hb[0]:
heapq.heappop(hb)
else:
res.append(heapq.heappop(ha))
heapq.heappop(hb)
return res
def union4(a, b):
"""Make heaps of a and b. Copy to output elements that are in a or b"""
ha = list(a)
hb = list(b)
res = []
heapq.heapify(ha)
heapq.heapify(hb)
rappend, heappop = res.append, heapq.heappop
while ha and hb:
if ha[0] < hb[0]:
rappend(heappop(ha))
elif ha[0] > hb[0]:
rappend(heappop(hb))
else:
rappend(heappop(ha))
heappop(hb)
return res + ha + hb # either ha or hb can contain a tail
def inter5(a, b):
"""sort a+b and copy only duplicates by scanning"""
it = iter(sorted(a+b))
prev = next(it)
res = []
rapp = res.append
for e in it:
if e == prev:
rapp(e)
prev = e
return res
def union5(a, b):
"""sort a+b and copy (duplicates only once) by scanning"""
it = iter(sorted(a+b))
prev = next(it)
res = [prev]
rapp = res.append
for e in it:
if e != prev:
rapp(e)
prev = e
return res
def inter99(a, b):
"""Make set of a and b. Output list of intersection."""
return list(set(a) & set(b))
def union99(a, b):
"""Make set of a and b. Output list of union."""
return list(set(a) | set(b))
def union99a(a, b):
"""Output list of set of a + b."""
return list(set(a + b))
def diff1(a, b):
"""Loop over a and check if ai not in b"""
return [ai for ai in a if ai not in b]
def diff4(a, b):
"""Make heaps of a and b. Copy to output elements that are i na but not in b"""
ha = list(a)
hb = list(b)
res = []
heapq.heapify(ha)
heapq.heapify(hb)
while ha and hb:
if ha[0] < hb[0]:
res.append(heapq.heappop(ha))
elif ha[0] > hb[0]:
heapq.heappop(hb)
else:
heapq.heappop(ha)
heapq.heappop(hb)
return res + ha # ha can contain a tail
def diff6(a, b):
"""Make set of b and loop over a and check that ai not in set(b)"""
setb = set(b)
return [ai for ai in a if ai not in setb]
def diff99(a, b):
"""Make sets of a and b and subtract"""
return list(set(a) - set(b))
if __name__ == '__main__':
from time import clock
from random import randint
from collections import defaultdict
import math
def ranarr(n, N):
"""choose n values from 0..N-1 - no doubles"""
data = range(N)
res = []
for i in range(n):
res.append(data.pop(randint(0, N - i - 1)))
return res
def ranarr2(n, N, nov):
data = range(N)
a, b = [], []
for i in range(nov):
x = data.pop(randint(0, N - i - 1))
a.append(x)
b.append(x)
for i in range(n - nov):
a.append(data.pop(randint(0, N - (i + nov) - 1)))
for i in range(n - nov):
a.append(data.pop(randint(0, N - (i + nov + (n - nov)) - 1)))
return a, b
timings = defaultdict(list)
def tim(method, a, b, n=100):
name = method.__name__
t0 = clock()
for i in xrange(n):
method(a, b)
ti = clock() - t0
timings[name].append((len(a), ti))
Ns = [39, 40, 41, 159, 160, 161, 640, 1280, 1281, 5119, 5120, 5121]
a_arr, b_arr = {}, {}
for N in Ns:
a_arr[N], b_arr[N] = ranarr2(N, 3 * N, N // 3)
intersections = [inter1, inter2, inter3, inter4, inter5, inter6, inter99]
unions = [union1, union2, union4, union5, union6, union99, union99a]
totime = [diff1, diff4, diff6, diff99]
for method in totime:
for N in Ns:
a = a_arr[N]
b = b_arr[N]
tim(method, a, b, 1)
def fit_timings(model, t):
"""models have form ti = a * f(ni) - linear without intercept"""
a = sum(ti * model(ni) for ni,ti in t) / sum(model(ni) ** 2 for ni,ti in t)
err2 = sum((a * model(ni) - ti) ** 2 for ni, ti in t)
return a, err2
def fit_timings1(model, t):
"""models have form ti = a * f(ni) - linear without intercept"""
n = len(t)
tav = sum(ti for ni, ti in t) / n
mav = sum(model(ni) for ni, ti in t) / n
a = sum((ti-tav) * model(ni) for ni,ti in t) / sum((model(ni)-mav) * model(ni) for ni,ti in t)
b = tav - a * mav
err2 = sum((a * model(ni) + b - ti) ** 2 for ni, ti in t)
#print b
return a, err2
def check_models(t):
res = {}
besterr = float("inf")
mod_n = lambda n: n
mod_n2 = lambda n: n ** 2
mod_n3 = lambda n: n ** 3
mod_nlogn = lambda n: n * math.log(n)
for model, mname in [(mod_n, "O(n)") , (mod_n2, "O(n2)"),
(mod_n3, "O(n3)"), (mod_nlogn, "O(nlogn)")]:
coeff, err2 = fit_timings(model, t)
res[mname] = err2
if err2 < besterr:
bestmethod = mname
besterr = err2
serr, altmethod = min((v, key) for key, v in res.iteritems() if key != bestmethod)
ratio = serr / besterr
return bestmethod, coeff, ratio, altmethod, res
for method in totime:
name = method.__name__
doc = method.__doc__
times = timings[name]
bestmethod, coeff, ratio, altmethod, fiterr = check_models(times)
print "{0:8s} {1:8s} coeff: {2:6.2e} ratio:{3:6.0f} alt: {4:8s} "\
"{5:s}".format(name, bestmethod, coeff, ratio, altmethod, doc)
"""
inter1 O(n2) coeff: 8.78e-05 ratio: 704 alt: O(nlogn) Loop over a and b and check if there are doubles
inter2 O(n2) coeff: 3.12e-05 ratio: 1958 alt: O(nlogn) Loop over a and check if present in b
inter3 O(n2) coeff: 3.47e-05 ratio: 510 alt: O(nlogn) Output a after removing all elements in b
inter4 O(nlogn) coeff: 1.03e-05 ratio: 34 alt: O(n) Make heaps of a and b. Copy to output elements that are in a and b
inter99 O(n) coeff: 2.71e-07 ratio: 5 alt: O(nlogn) Make set of a and b. Output list of intersection.
union1 O(n2) coeff: 5.83e-05 ratio: 7656 alt: O(nlogn) Loop over the elements of a+b
union2 O(n2) coeff: 3.06e-05 ratio: 1225 alt: O(nlogn) Output a + Loop over b and check if present in a
union4 O(nlogn) coeff: 1.09e-05 ratio: 3 alt: O(n) Make heaps of a and b. Copy to output elements that are in a or b
union99 O(n) coeff: 2.94e-07 ratio: 4 alt: O(nlogn) Make set of a and b. Output list of union.
union99a O(nlogn) coeff: 2.30e-07 ratio: 1 alt: O(n) Output list of set of a + b.
diff1 O(n2) coeff: 4.01e-06 ratio: 27 alt: O(n3) Loop over a and check if ai not in b
diff4 O(nlogn) coeff: 6.06e-07 ratio: 5 alt: O(n) Make heaps of a and b. Copy to output elements that are i na but not in b
diff6 O(n) coeff: 1.38e-08 ratio: 4 alt: O(nlogn) Make set of b and loop over a and check that ai not in set(b)
diff99 O(nlogn) coeff: 2.07e-08 ratio: 2 alt: O(n) Make sets of a and b and subtract
"""