SHARE
TWEET

Union/Intersection/Diff

ploffie Nov 20th, 2012 54 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import heapq
  2.  
  3. def inter1(a, b):
  4.     """Loop over a and b and check if there are doubles"""
  5.     return [ai for ai in a for bi in b if ai == bi]
  6.  
  7. def union1(a, b):
  8.     """Loop over the elements of a+b"""
  9.     res = []
  10.     for x in a + b:
  11.         if x not in res:
  12.             res.append(x)
  13.     return res
  14.  
  15. def inter6(a, b):
  16.     """Make set of b and loop over a and check if ai in b"""
  17.     b = set(b)
  18.     return [ai for ai in a if ai in b]
  19.  
  20. def inter2(a, b):
  21.     """Loop over a and check if present in b"""
  22.     return [ai for ai in a if ai in b]
  23.  
  24. def union2(a, b):
  25.     """Output a + Loop over b and check if present in a"""
  26.     return a + [bi for bi in b if bi not in a]
  27.  
  28. def union6(a, b):
  29.     """Make set of a. Output a + loop over b and check if present in a"""
  30.     sa = set(a)
  31.     return a + [bi for bi in b if bi not in sa]
  32.  
  33. def inter3(a, b):
  34.     """Output a after removing all elements in b"""
  35.     res = list(a)
  36.     for ai in a:
  37.         if ai not in b: res.remove(ai)
  38.     return res
  39.  
  40. def inter4(a, b):
  41.     """Make heaps of a and b. Copy to output elements that are in a and b"""
  42.     ha = list(a)
  43.     hb = list(b)
  44.     res = []
  45.     heapq.heapify(ha)
  46.     heapq.heapify(hb)
  47.     while ha and hb:
  48.         if ha[0] < hb[0]:
  49.             heapq.heappop(ha)
  50.         elif ha[0] > hb[0]:
  51.             heapq.heappop(hb)
  52.         else:
  53.             res.append(heapq.heappop(ha))
  54.             heapq.heappop(hb)
  55.     return res
  56.            
  57. def union4(a, b):    
  58.     """Make heaps of a and b. Copy to output elements that are in a or b"""
  59.     ha = list(a)
  60.     hb = list(b)
  61.     res = []
  62.     heapq.heapify(ha)
  63.     heapq.heapify(hb)
  64.     rappend, heappop = res.append, heapq.heappop
  65.     while ha and hb:
  66.         if ha[0] < hb[0]:
  67.             rappend(heappop(ha))
  68.         elif ha[0] > hb[0]:
  69.             rappend(heappop(hb))
  70.         else:
  71.             rappend(heappop(ha))
  72.             heappop(hb)
  73.     return res + ha + hb # either ha or hb can contain a tail
  74.  
  75. def inter5(a, b):
  76.     """sort a+b and copy only duplicates by scanning"""
  77.     it = iter(sorted(a+b))
  78.     prev = next(it)
  79.     res = []
  80.     rapp = res.append
  81.     for e in it:
  82.         if e == prev:
  83.             rapp(e)
  84.         prev = e
  85.     return res
  86.        
  87. def union5(a, b):
  88.     """sort a+b and copy (duplicates only once) by scanning"""
  89.     it = iter(sorted(a+b))
  90.     prev = next(it)
  91.     res = [prev]
  92.     rapp = res.append
  93.     for e in it:
  94.         if e != prev:
  95.             rapp(e)
  96.         prev = e
  97.     return res
  98.        
  99. def inter99(a, b):
  100.     """Make set of a and b. Output list of intersection."""
  101.     return list(set(a) & set(b))
  102.  
  103. def union99(a, b):
  104.     """Make set of a and b. Output list of union."""
  105.     return list(set(a) | set(b))
  106.  
  107. def union99a(a, b):
  108.     """Output list of set of a + b."""
  109.     return list(set(a + b))
  110.  
  111. def diff1(a, b):
  112.     """Loop over a and check if ai not in b"""
  113.     return [ai for ai in a if ai not in b]
  114.    
  115. def diff4(a, b):    
  116.     """Make heaps of a and b. Copy to output elements that are i na but not in b"""
  117.     ha = list(a)
  118.     hb = list(b)
  119.     res = []
  120.     heapq.heapify(ha)
  121.     heapq.heapify(hb)
  122.     while ha and hb:
  123.         if ha[0] < hb[0]:
  124.             res.append(heapq.heappop(ha))
  125.         elif ha[0] > hb[0]:
  126.             heapq.heappop(hb)
  127.         else:
  128.             heapq.heappop(ha)
  129.             heapq.heappop(hb)
  130.     return res + ha # ha can contain a tail
  131.    
  132. def diff6(a, b):
  133.     """Make set of b and loop over a and check that ai not in set(b)"""
  134.     setb = set(b)
  135.     return [ai for ai in a if ai not in setb]
  136.    
  137. def diff99(a, b):
  138.     """Make sets of a and b and subtract"""
  139.     return list(set(a) - set(b))
  140.    
  141. if __name__ == '__main__':
  142.     from time import clock
  143.     from random import randint
  144.     from collections import defaultdict
  145.     import math
  146.    
  147.     def ranarr(n, N):
  148.         """choose n values from 0..N-1 - no doubles"""
  149.         data = range(N)
  150.         res = []
  151.         for i in range(n):
  152.             res.append(data.pop(randint(0, N - i - 1)))
  153.         return res
  154.    
  155.     def ranarr2(n, N, nov):
  156.         data = range(N)
  157.         a, b = [], []
  158.         for i in range(nov):
  159.             x = data.pop(randint(0, N - i - 1))
  160.             a.append(x)
  161.             b.append(x)
  162.         for i in range(n - nov):
  163.             a.append(data.pop(randint(0, N - (i + nov) - 1)))
  164.         for i in range(n - nov):
  165.             a.append(data.pop(randint(0, N - (i + nov + (n - nov)) - 1)))
  166.         return a, b
  167.            
  168.     timings = defaultdict(list)
  169.     def tim(method, a, b, n=100):
  170.         name = method.__name__
  171.         t0 = clock()
  172.         for i in xrange(n):
  173.             method(a, b)
  174.         ti = clock() - t0
  175.         timings[name].append((len(a), ti))
  176.        
  177.     Ns = [39, 40, 41, 159, 160, 161, 640, 1280, 1281, 5119, 5120, 5121]
  178.     a_arr, b_arr = {}, {}  
  179.     for N in Ns:
  180.         a_arr[N], b_arr[N] = ranarr2(N, 3 * N, N // 3)
  181.        
  182.    
  183.     intersections = [inter1, inter2, inter3, inter4, inter5, inter6, inter99]
  184.     unions = [union1, union2, union4, union5, union6, union99, union99a]
  185.     totime = [diff1, diff4, diff6, diff99]
  186.     for method in totime:
  187.         for N in Ns:
  188.             a = a_arr[N]
  189.             b = b_arr[N]
  190.             tim(method, a, b, 1)
  191.  
  192.     def fit_timings(model, t):
  193.         """models have form ti = a * f(ni) - linear without intercept"""
  194.         a = sum(ti * model(ni) for ni,ti in t) / sum(model(ni) ** 2 for ni,ti in t)
  195.         err2 = sum((a * model(ni) - ti) ** 2 for ni, ti in t)
  196.         return a, err2
  197.            
  198.     def fit_timings1(model, t):
  199.         """models have form ti = a * f(ni) - linear without intercept"""
  200.         n = len(t)
  201.         tav = sum(ti for ni, ti in t) / n
  202.         mav = sum(model(ni) for ni, ti in t) / n
  203.         a = sum((ti-tav) * model(ni) for ni,ti in t) / sum((model(ni)-mav) * model(ni) for ni,ti in t)
  204.         b = tav - a * mav
  205.         err2 = sum((a * model(ni) + b - ti) ** 2 for ni, ti in t)
  206.         #print b
  207.         return a, err2
  208.            
  209.     def check_models(t):
  210.         res = {}
  211.         besterr = float("inf")
  212.         mod_n = lambda n: n
  213.         mod_n2 = lambda n: n ** 2
  214.         mod_n3 = lambda n: n ** 3
  215.         mod_nlogn = lambda n: n * math.log(n)
  216.         for model, mname in [(mod_n, "O(n)") , (mod_n2, "O(n2)"),
  217.             (mod_n3, "O(n3)"), (mod_nlogn, "O(nlogn)")]:
  218.             coeff, err2 = fit_timings(model, t)
  219.             res[mname] = err2
  220.             if err2 < besterr:
  221.                 bestmethod = mname
  222.                 besterr = err2
  223.         serr, altmethod = min((v, key) for key, v in res.iteritems() if key != bestmethod)
  224.         ratio = serr / besterr
  225.         return bestmethod, coeff, ratio, altmethod, res
  226.                    
  227.     for method in totime:
  228.         name = method.__name__
  229.         doc = method.__doc__
  230.         times = timings[name]
  231.         bestmethod, coeff, ratio, altmethod, fiterr = check_models(times)
  232.         print "{0:8s} {1:8s} coeff: {2:6.2e}  ratio:{3:6.0f}   alt: {4:8s} "\
  233.             "{5:s}".format(name, bestmethod, coeff, ratio, altmethod, doc)
  234.  
  235. """
  236. inter1   O(n2)    coeff: 8.78e-05  ratio:   704   alt: O(nlogn) Loop over a and b and check if there are doubles
  237. inter2   O(n2)    coeff: 3.12e-05  ratio:  1958   alt: O(nlogn) Loop over a and check if present in b
  238. inter3   O(n2)    coeff: 3.47e-05  ratio:   510   alt: O(nlogn) Output a after removing all elements in b
  239. inter4   O(nlogn) coeff: 1.03e-05  ratio:    34   alt: O(n)     Make heaps of a and b. Copy to output elements that are in a and b
  240. inter99  O(n)     coeff: 2.71e-07  ratio:     5   alt: O(nlogn) Make set of a and b. Output list of intersection.
  241. union1   O(n2)    coeff: 5.83e-05  ratio:  7656   alt: O(nlogn) Loop over the elements of a+b
  242. union2   O(n2)    coeff: 3.06e-05  ratio:  1225   alt: O(nlogn) Output a + Loop over b and check if present in a
  243. union4   O(nlogn) coeff: 1.09e-05  ratio:     3   alt: O(n)     Make heaps of a and b. Copy to output elements that are in a or b
  244. union99  O(n)     coeff: 2.94e-07  ratio:     4   alt: O(nlogn) Make set of a and b. Output list of union.
  245. union99a O(nlogn) coeff: 2.30e-07  ratio:     1   alt: O(n)     Output list of set of a + b.
  246. diff1    O(n2)    coeff: 4.01e-06  ratio:    27   alt: O(n3)    Loop over a and check if ai not in b
  247. diff4    O(nlogn) coeff: 6.06e-07  ratio:     5   alt: O(n)     Make heaps of a and b. Copy to output elements that are i na but not in b
  248. diff6    O(n)     coeff: 1.38e-08  ratio:     4   alt: O(nlogn) Make set of b and loop over a and check that ai not in set(b)
  249. diff99   O(nlogn) coeff: 2.07e-08  ratio:     2   alt: O(n)     Make sets of a and b and subtract
  250. """
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top