Guest User

uniquifiers_benchmark.py

a guest
Dec 27th, 2013
161
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. from random import shuffle, randint
  2. import re
  3. from sets import Set
  4.  
  5. def f1(seq): # Raymond Hettinger
  6.     # not order preserving
  7.     set = {}
  8.     map(set.__setitem__, seq, [])
  9.     return set.keys()
  10.  
  11.    
  12. def f2(seq):   # *********
  13.     # order preserving
  14.     checked = []
  15.     for e in seq:
  16.         if e not in checked:
  17.             checked.append(e)
  18.     return checked
  19.  
  20. def f3(seq):
  21.     # Not order preserving
  22.     keys = {}
  23.     for e in seq:
  24.         keys[e] = 1
  25.     return keys.keys()
  26.  
  27. def f4(seq): # ********** order preserving
  28.     noDupes = []
  29.     [noDupes.append(i) for i in seq if not noDupes.count(i)]
  30.     return noDupes
  31.  
  32. def f5(seq, idfun=None): # Alex Martelli ******* order preserving
  33.     if idfun is None:
  34.         def idfun(x): return x
  35.     seen = {}
  36.     result = []
  37.     for item in seq:
  38.         marker = idfun(item)
  39.         # in old Python versions:
  40.         # if seen.has_key(marker)
  41.         # but in new ones:
  42.         if marker in seen: continue
  43.         seen[marker] = 1
  44.         result.append(item)
  45.     return result
  46.  
  47.  
  48. def f5b(seq, idfun=None): # Alex Martelli ******* order preserving
  49.     if idfun is None:
  50.         def idfun(x): return x
  51.     seen = {}
  52.     result = []
  53.     for item in seq:
  54.         marker = idfun(item)
  55.         # in old Python versions:
  56.         # if seen.has_key(marker)
  57.         # but in new ones:
  58.         if marker not in seen:
  59.             seen[marker] = 1
  60.             result.append(item)
  61.            
  62.     return result
  63.  
  64.  
  65.  
  66. def f6(seq):
  67.     # Not order preserving
  68.     return list(Set(seq))
  69.  
  70. def f7(seq):
  71.     # Not order preserving
  72.     return list(set(seq))
  73.  
  74. def f8_original(seq): # Dave Kirby
  75.     # Order preserving
  76.     seen = set()
  77.     return [x for x in seq if x not in seen and not seen.add(x)]
  78.  
  79. def uniquify(seq):
  80.     seen = set()
  81.     seen_add = seen.add
  82.     return [x for x in seq if x not in seen and not seen_add(x)]
  83.  
  84. def terhorst(items):
  85.     found = set([])
  86.     keep = []
  87.  
  88.     for item in items:
  89.         if item not in found:
  90.             found.add(item)
  91.             keep.append(item)
  92.  
  93.     return keep
  94.  
  95. def terhorst_localref(items):
  96.     found = set()
  97.     found_add = found.add
  98.     keep = []
  99.     keep_append = keep.append
  100.  
  101.     for item in items:
  102.         if item not in found:
  103.             found_add(item)
  104.             keep_append(item)
  105.     return keep
  106.  
  107. def del_dups(seq):
  108.     seen = {}
  109.     pos = 0
  110.     for item in seq:
  111.         if item not in seen:
  112.             seen[item] = True
  113.             seq[pos] = item
  114.             pos += 1
  115.     del seq[pos:]
  116.     return seq
  117.  
  118.  
  119. def f9(seq):
  120.     # Not order preserving
  121.     return {}.fromkeys(seq).keys()
  122.  
  123. def f10(seq, idfun=None): # Andrew Dalke
  124.     # Order preserving
  125.     return list(_f10(seq, idfun))
  126.  
  127. def _f10(seq, idfun=None):
  128.     seen = set()
  129.     if idfun is None:
  130.         for x in seq:
  131.             if x in seen:
  132.                 continue
  133.             seen.add(x)
  134.             yield x
  135.     else:
  136.         for x in seq:
  137.             x = idfun(x)
  138.             if x in seen:
  139.                 continue
  140.             seen.add(x)
  141.             yield x
  142.            
  143.            
  144. def f11(seq): # f10 but simpler
  145.     # Order preserving
  146.     return list(_f10(seq))
  147.  
  148. def _f11(seq):
  149.     seen = set()
  150.     for x in seq:
  151.         if x in seen:
  152.             continue
  153.         seen.add(x)
  154.         yield x
  155.            
  156. import time
  157.  
  158. def timing(f, n, a):
  159.     print f.__name__,
  160.     r = range(n)
  161.     t1 = time.clock()
  162.     for i in r:
  163.         f(a); f(a); f(a); f(a); f(a); f(a); f(a); f(a); f(a); f(a)
  164.     t2 = time.clock()
  165.     print round(t2-t1, 3)
  166.     return f(a)
  167.  
  168.  
  169.  
  170. def getRandomString(length=10, loweronly=1, numbersonly=0,
  171.                     lettersonly=0):
  172.     """ return a very random string """
  173.     _letters = 'abcdefghijklmnopqrstuvwxyz'
  174.     if numbersonly:
  175.         l = list('0123456789')
  176.     elif lettersonly:
  177.         l = list(_letters + _letters.upper())
  178.     else:
  179.         lowercase = _letters+'0123456789'*2
  180.         l = list(lowercase + lowercase.upper())
  181.     shuffle(l)
  182.     s = ''.join(l)
  183.     if len(s) < length:
  184.         s = s + getRandomString(loweronly=1)
  185.     s = s[:length]
  186.     if loweronly:
  187.         return s.lower()
  188.     else:
  189.         return s
  190.  
  191. testdata = {}
  192. for i in range(35):
  193.     k = getRandomString(5, lettersonly=1)
  194.     v = getRandomString(100 )
  195.     testdata[k] = v
  196.    
  197. testdata = [int(x) for x in list('21354612')]
  198. testdata += list('abcceeaa5efm')
  199. class X:
  200.     def __init__(self, n):
  201.         self.foo = n
  202.     def __repr__(self):
  203.         return "<foo %r>"%self.foo
  204.     def __cmp__(self, e):
  205.         return cmp(self.foo, e.foo)
  206.        
  207. testdata = []
  208. for i in range(10000):
  209.     testdata.append(getRandomString(3, loweronly=True))
  210. #testdata = ['f','g','c','d','b','a','a']
  211.  
  212.  
  213. #order_preserving = f2, f4, f5, f5b, f8, f10, f11
  214. order_preserving = f8_original, uniquify, terhorst, terhorst_localref, del_dups
  215.  
  216. not_order_preserving = f1, f3, f6, f7, f9
  217. testfuncs = order_preserving# + not_order_preserving
  218.  
  219. ref = f8_original(testdata)
  220. for f in testfuncs:
  221.     if f in order_preserving:
  222.         print "*",
  223.     res = timing(f, 100, testdata)
  224.     assert ref == res, "%s %s" % (ref, res)
RAW Paste Data