Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # http://stackoverflow.com/questions/39033565/how-to-remove-duplicates-of-huge-lists-of-objects-in-python/39034026#39034026
- import time, random
- def timing(f, n, a):
- print(f.__name__)
- r = range(n)
- t1 = time.clock()
- for i in r:
- f(a); f(a); f(a); f(a); f(a); f(a); f(a); f(a); f(a); f(a)
- t2 = time.clock()
- print (round(t2-t1, 3))
- def getRandomString(length=10, loweronly=1, numbersonly=0,
- lettersonly=0):
- """ return a very random string """
- _letters = 'abcdefghijklmnopqrstuvwxyz'
- if numbersonly:
- l = list('0123456789')
- elif lettersonly:
- l = list(_letters + _letters.upper())
- else:
- lowercase = _letters+'0123456789'*2
- l = list(lowercase + lowercase.upper())
- random.shuffle(l)
- s = ''.join(l)
- if len(s) < length:
- s = s + getRandomString(loweronly=1)
- s = s[:length]
- if loweronly:
- return s.lower()
- else:
- return s
- ## create random data
- def get_random_data(no_of_dicts=2000, len_of_dicts=4, dupes=100):
- l = []
- for i in range(no_of_dicts):
- testdata = {}
- for i in range(len_of_dicts):
- testdata[getRandomString(4, loweronly=True)] = getRandomString(6, loweronly=True)
- l.append(testdata)
- # add dupes at the end
- for i in range(dupes):
- l.append(l[i])
- random.shuffle(l)
- return l
- def remove_dupes(mylist):
- newlist = [mylist[0]]
- for e in mylist:
- if e not in newlist:
- newlist.append(e)
- return newlist
- l = get_random_data(no_of_dicts=2000, len_of_dicts=6, dupes=100)
- nl = remove_dupes(l) # do remove
- timing(remove_dupes, 10, l)
- print(len(l))
- print(len(nl))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement