Advertisement
Guest User

Untitled

a guest
Aug 19th, 2016
836
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.66 KB | None | 0 0
  1. # http://stackoverflow.com/questions/39033565/how-to-remove-duplicates-of-huge-lists-of-objects-in-python/39034026#39034026
  2.  
  3. import time, random
  4.  
  5. def timing(f, n, a):
  6.     print(f.__name__)
  7.     r = range(n)
  8.     t1 = time.clock()
  9.     for i in r:
  10.         f(a); f(a); f(a); f(a); f(a); f(a); f(a); f(a); f(a); f(a)
  11.     t2 = time.clock()
  12.     print (round(t2-t1, 3))
  13.  
  14. def getRandomString(length=10, loweronly=1, numbersonly=0,
  15.                     lettersonly=0):
  16.     """ return a very random string """
  17.     _letters = 'abcdefghijklmnopqrstuvwxyz'
  18.     if numbersonly:
  19.         l = list('0123456789')
  20.     elif lettersonly:
  21.         l = list(_letters + _letters.upper())
  22.     else:
  23.         lowercase = _letters+'0123456789'*2
  24.         l = list(lowercase + lowercase.upper())
  25.     random.shuffle(l)
  26.     s = ''.join(l)
  27.     if len(s) < length:
  28.         s = s + getRandomString(loweronly=1)
  29.     s = s[:length]
  30.     if loweronly:
  31.         return s.lower()
  32.     else:
  33.         return s
  34.  
  35. ## create random data
  36. def get_random_data(no_of_dicts=2000, len_of_dicts=4, dupes=100):
  37.     l = []
  38.     for i in range(no_of_dicts):
  39.         testdata = {}
  40.         for i in range(len_of_dicts):
  41.             testdata[getRandomString(4, loweronly=True)] = getRandomString(6, loweronly=True)
  42.         l.append(testdata)
  43.    
  44.     # add dupes at the end
  45.     for i in range(dupes):
  46.             l.append(l[i])
  47.     random.shuffle(l)
  48.     return l
  49.    
  50.  
  51. def remove_dupes(mylist):
  52.     newlist = [mylist[0]]
  53.     for e in mylist:
  54.         if e not in newlist:
  55.             newlist.append(e)
  56.     return newlist
  57.        
  58. l = get_random_data(no_of_dicts=2000, len_of_dicts=6, dupes=100)
  59. nl = remove_dupes(l) # do remove
  60.  
  61. timing(remove_dupes, 10, l)
  62.  
  63. print(len(l))
  64. print(len(nl))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement