• API
• FAQ
• Tools
• Archive
SHARE
TWEET Union/Intersection/Diff ploffie  Nov 20th, 2012 54 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. import heapq
2.
3. def inter1(a, b):
4.     """Loop over a and b and check if there are doubles"""
5.     return [ai for ai in a for bi in b if ai == bi]
6.
7. def union1(a, b):
8.     """Loop over the elements of a+b"""
9.     res = []
10.     for x in a + b:
11.         if x not in res:
12.             res.append(x)
13.     return res
14.
15. def inter6(a, b):
16.     """Make set of b and loop over a and check if ai in b"""
17.     b = set(b)
18.     return [ai for ai in a if ai in b]
19.
20. def inter2(a, b):
21.     """Loop over a and check if present in b"""
22.     return [ai for ai in a if ai in b]
23.
24. def union2(a, b):
25.     """Output a + Loop over b and check if present in a"""
26.     return a + [bi for bi in b if bi not in a]
27.
28. def union6(a, b):
29.     """Make set of a. Output a + loop over b and check if present in a"""
30.     sa = set(a)
31.     return a + [bi for bi in b if bi not in sa]
32.
33. def inter3(a, b):
34.     """Output a after removing all elements in b"""
35.     res = list(a)
36.     for ai in a:
37.         if ai not in b: res.remove(ai)
38.     return res
39.
40. def inter4(a, b):
41.     """Make heaps of a and b. Copy to output elements that are in a and b"""
42.     ha = list(a)
43.     hb = list(b)
44.     res = []
45.     heapq.heapify(ha)
46.     heapq.heapify(hb)
47.     while ha and hb:
48.         if ha < hb:
49.             heapq.heappop(ha)
50.         elif ha > hb:
51.             heapq.heappop(hb)
52.         else:
53.             res.append(heapq.heappop(ha))
54.             heapq.heappop(hb)
55.     return res
56.
57. def union4(a, b):
58.     """Make heaps of a and b. Copy to output elements that are in a or b"""
59.     ha = list(a)
60.     hb = list(b)
61.     res = []
62.     heapq.heapify(ha)
63.     heapq.heapify(hb)
64.     rappend, heappop = res.append, heapq.heappop
65.     while ha and hb:
66.         if ha < hb:
67.             rappend(heappop(ha))
68.         elif ha > hb:
69.             rappend(heappop(hb))
70.         else:
71.             rappend(heappop(ha))
72.             heappop(hb)
73.     return res + ha + hb # either ha or hb can contain a tail
74.
75. def inter5(a, b):
76.     """sort a+b and copy only duplicates by scanning"""
77.     it = iter(sorted(a+b))
78.     prev = next(it)
79.     res = []
80.     rapp = res.append
81.     for e in it:
82.         if e == prev:
83.             rapp(e)
84.         prev = e
85.     return res
86.
87. def union5(a, b):
88.     """sort a+b and copy (duplicates only once) by scanning"""
89.     it = iter(sorted(a+b))
90.     prev = next(it)
91.     res = [prev]
92.     rapp = res.append
93.     for e in it:
94.         if e != prev:
95.             rapp(e)
96.         prev = e
97.     return res
98.
99. def inter99(a, b):
100.     """Make set of a and b. Output list of intersection."""
101.     return list(set(a) & set(b))
102.
103. def union99(a, b):
104.     """Make set of a and b. Output list of union."""
105.     return list(set(a) | set(b))
106.
107. def union99a(a, b):
108.     """Output list of set of a + b."""
109.     return list(set(a + b))
110.
111. def diff1(a, b):
112.     """Loop over a and check if ai not in b"""
113.     return [ai for ai in a if ai not in b]
114.
115. def diff4(a, b):
116.     """Make heaps of a and b. Copy to output elements that are i na but not in b"""
117.     ha = list(a)
118.     hb = list(b)
119.     res = []
120.     heapq.heapify(ha)
121.     heapq.heapify(hb)
122.     while ha and hb:
123.         if ha < hb:
124.             res.append(heapq.heappop(ha))
125.         elif ha > hb:
126.             heapq.heappop(hb)
127.         else:
128.             heapq.heappop(ha)
129.             heapq.heappop(hb)
130.     return res + ha # ha can contain a tail
131.
132. def diff6(a, b):
133.     """Make set of b and loop over a and check that ai not in set(b)"""
134.     setb = set(b)
135.     return [ai for ai in a if ai not in setb]
136.
137. def diff99(a, b):
138.     """Make sets of a and b and subtract"""
139.     return list(set(a) - set(b))
140.
141. if __name__ == '__main__':
142.     from time import clock
143.     from random import randint
144.     from collections import defaultdict
145.     import math
146.
147.     def ranarr(n, N):
148.         """choose n values from 0..N-1 - no doubles"""
149.         data = range(N)
150.         res = []
151.         for i in range(n):
152.             res.append(data.pop(randint(0, N - i - 1)))
153.         return res
154.
155.     def ranarr2(n, N, nov):
156.         data = range(N)
157.         a, b = [], []
158.         for i in range(nov):
159.             x = data.pop(randint(0, N - i - 1))
160.             a.append(x)
161.             b.append(x)
162.         for i in range(n - nov):
163.             a.append(data.pop(randint(0, N - (i + nov) - 1)))
164.         for i in range(n - nov):
165.             a.append(data.pop(randint(0, N - (i + nov + (n - nov)) - 1)))
166.         return a, b
167.
168.     timings = defaultdict(list)
169.     def tim(method, a, b, n=100):
170.         name = method.__name__
171.         t0 = clock()
172.         for i in xrange(n):
173.             method(a, b)
174.         ti = clock() - t0
175.         timings[name].append((len(a), ti))
176.
177.     Ns = [39, 40, 41, 159, 160, 161, 640, 1280, 1281, 5119, 5120, 5121]
178.     a_arr, b_arr = {}, {}
179.     for N in Ns:
180.         a_arr[N], b_arr[N] = ranarr2(N, 3 * N, N // 3)
181.
182.
183.     intersections = [inter1, inter2, inter3, inter4, inter5, inter6, inter99]
184.     unions = [union1, union2, union4, union5, union6, union99, union99a]
185.     totime = [diff1, diff4, diff6, diff99]
186.     for method in totime:
187.         for N in Ns:
188.             a = a_arr[N]
189.             b = b_arr[N]
190.             tim(method, a, b, 1)
191.
192.     def fit_timings(model, t):
193.         """models have form ti = a * f(ni) - linear without intercept"""
194.         a = sum(ti * model(ni) for ni,ti in t) / sum(model(ni) ** 2 for ni,ti in t)
195.         err2 = sum((a * model(ni) - ti) ** 2 for ni, ti in t)
196.         return a, err2
197.
198.     def fit_timings1(model, t):
199.         """models have form ti = a * f(ni) - linear without intercept"""
200.         n = len(t)
201.         tav = sum(ti for ni, ti in t) / n
202.         mav = sum(model(ni) for ni, ti in t) / n
203.         a = sum((ti-tav) * model(ni) for ni,ti in t) / sum((model(ni)-mav) * model(ni) for ni,ti in t)
204.         b = tav - a * mav
205.         err2 = sum((a * model(ni) + b - ti) ** 2 for ni, ti in t)
206.         #print b
207.         return a, err2
208.
209.     def check_models(t):
210.         res = {}
211.         besterr = float("inf")
212.         mod_n = lambda n: n
213.         mod_n2 = lambda n: n ** 2
214.         mod_n3 = lambda n: n ** 3
215.         mod_nlogn = lambda n: n * math.log(n)
216.         for model, mname in [(mod_n, "O(n)") , (mod_n2, "O(n2)"),
217.             (mod_n3, "O(n3)"), (mod_nlogn, "O(nlogn)")]:
218.             coeff, err2 = fit_timings(model, t)
219.             res[mname] = err2
220.             if err2 < besterr:
221.                 bestmethod = mname
222.                 besterr = err2
223.         serr, altmethod = min((v, key) for key, v in res.iteritems() if key != bestmethod)
224.         ratio = serr / besterr
225.         return bestmethod, coeff, ratio, altmethod, res
226.
227.     for method in totime:
228.         name = method.__name__
229.         doc = method.__doc__
230.         times = timings[name]
231.         bestmethod, coeff, ratio, altmethod, fiterr = check_models(times)
232.         print "{0:8s} {1:8s} coeff: {2:6.2e}  ratio:{3:6.0f}   alt: {4:8s} "\
233.             "{5:s}".format(name, bestmethod, coeff, ratio, altmethod, doc)
234.
235. """
236. inter1   O(n2)    coeff: 8.78e-05  ratio:   704   alt: O(nlogn) Loop over a and b and check if there are doubles
237. inter2   O(n2)    coeff: 3.12e-05  ratio:  1958   alt: O(nlogn) Loop over a and check if present in b
238. inter3   O(n2)    coeff: 3.47e-05  ratio:   510   alt: O(nlogn) Output a after removing all elements in b
239. inter4   O(nlogn) coeff: 1.03e-05  ratio:    34   alt: O(n)     Make heaps of a and b. Copy to output elements that are in a and b
240. inter99  O(n)     coeff: 2.71e-07  ratio:     5   alt: O(nlogn) Make set of a and b. Output list of intersection.
241. union1   O(n2)    coeff: 5.83e-05  ratio:  7656   alt: O(nlogn) Loop over the elements of a+b
242. union2   O(n2)    coeff: 3.06e-05  ratio:  1225   alt: O(nlogn) Output a + Loop over b and check if present in a
243. union4   O(nlogn) coeff: 1.09e-05  ratio:     3   alt: O(n)     Make heaps of a and b. Copy to output elements that are in a or b
244. union99  O(n)     coeff: 2.94e-07  ratio:     4   alt: O(nlogn) Make set of a and b. Output list of union.
245. union99a O(nlogn) coeff: 2.30e-07  ratio:     1   alt: O(n)     Output list of set of a + b.
246. diff1    O(n2)    coeff: 4.01e-06  ratio:    27   alt: O(n3)    Loop over a and check if ai not in b
247. diff4    O(nlogn) coeff: 6.06e-07  ratio:     5   alt: O(n)     Make heaps of a and b. Copy to output elements that are i na but not in b
248. diff6    O(n)     coeff: 1.38e-08  ratio:     4   alt: O(nlogn) Make set of b and loop over a and check that ai not in set(b)
249. diff99   O(nlogn) coeff: 2.07e-08  ratio:     2   alt: O(n)     Make sets of a and b and subtract
250. """
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy.

Top