Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from binascii import unhexlify
- import collections
- from functools import partial
- from itertools import cycle, islice, imap
- import multiprocessing
- import numpy as np
- from timeit import timeit
- from Crypto.Cipher import AES
- key = unhexlify('6bce1cb8d64153f82570751b6653c943')
- iv = unhexlify('b15a65475a91774a45106fbc28f0df70')
- # I think you want a newly-initialized AES state for each plaintext,
- # so this function wraps that up.
- def aesify(plaintext):
- aes = AES.new(key, AES.MODE_CBC, iv)
- return aes.encrypt(plaintext)
- # This creates a lazy iterator with an infinite number of copies of
- # the 10 hex strings from your example.
- hextexts = cycle('''f493befb2dcad5118d523a4a4bf4a504
- 54fc4e0a82ae8dc56cc7befc9994b79d
- 878d287647b457fd95d40691b6e0c8ab
- dc0adc16665eb96a15d3257752ae67dc
- 8cda3b8f23d38e9240b9a89587f69970
- e06301763146c1bac24619e61015f481
- c19def2f12e5707d89539e18ad104937
- 048d734a1a36d4346edc7ceda07ff171
- 5e621ce0a570478c1c2ec3e557ca3e0d
- e55c57b119ff922b7f87db0ead2006cd'''.splitlines())
- # This transforms it into a lazy iterator with infinite binary
- # versions of the hex strings. Each of the islice calls below will
- # give us a lazy iterator that takes the next 1000000 binary strings
- # from this infinite list.
- texts = imap(unhexlify, hextexts)
- # This is the simplest implementation, with a regular for loop. It's
- # slow, and it uses about 50MB at peak.
- def loop1m():
- texts1m = islice(texts, 1000000)
- result = []
- for text in texts1m:
- result.append(aesify(text))
- return result
- # This creates a list in memory, and maps it to another list. Letting
- # built-in functions do the looping means you skip the cost of a
- # Python for loop, which is pretty high. Basically the same benefit
- # you'd get from numpy, but not quite as good. And it uses about 72MB
- # at peak.
- def map1m():
- texts1m = list(islice(texts, 1000000))
- return map(aesify, texts1m)
- # This just transforms the lazy iterator into another one, without
- # every building anything in memory. It's a bit slower, but it uses
- # 0MB (actually a few KB, not quite nothing... but close enough).
- def imap1m():
- texts1m = islice(texts, 1000000)
- return imap(aesify, texts1m)
- # This uses numpy to vectorize the loop. That's a little faster than
- # map, but not much--and the cost of building the array in the first
- # place partially cancels that out. The real benefit over map here is
- # the space, 16MB.
- def numpy1m():
- a = np.fromiter(islice(texts, 1000000), dtype='|S16')
- return np.vectorize(aesify)(a)
- # For comparison, here's the numpy version without the cost of
- # building the array included.
- ga = np.fromiter(islice(texts, 1000000), dtype='|S16')
- def numpypre1m():
- return np.vectorize(aesify)(ga)
- # Here's the simplest multiprocesing implementation. It's similar to
- # the map implementation, but almost 4x as fast. The memory use may be
- # a little higher, but not hugely. (It's hard to measure with sizes
- # this small, because each new Python process uses some memory before
- # it even gets started.)
- pool = multiprocessing.Pool()
- def pool1m():
- texts1m = islice(texts, 1000000)
- return pool.map(aesify, texts1m)
- # Here's a slightly smarter one. It's similar to the imap
- # implementation (very little memory use), and it uses bigger chunks
- # than the default.
- def ipool1m():
- texts1m = islice(texts, 1000000)
- return pool.imap(aesify, texts1m, chunksize=8192)
- # This is a convenient thing to do for performance testing. If you
- # just test how long it takes to transform one lazy iterator into
- # another, the answer is always nearly 0, because the actual work
- # doesn't happen until someone consumes the values. The fastest way to
- # consume an entire iterator (at least in CPython 2.7 and 3.2,
- # according to tests someone posted on python-ideas last year) is to
- # feed it into a deque with maxlen=0.
- def discard(it):
- collections.deque(it, maxlen=0)
- for f in loop1m, map1m, imap1m, numpy1m, numpypre1m, pool1m, ipool1m:
- t = timeit(lambda: discard(f()), number=1)
- print('{:10s}: {}'.format(f.__name__, t))
- # Results on a 64-bit 4-core MacBook Pro with Apple Python 2.7.2:
- # loop1m : 8.07510995865
- # map1m : 7.20256185532
- # imap1m : 7.79751801491
- # numpy1m : 7.33888506889
- # numpypre1m: 6.62892603874
- # pool1m : 2.94084405899
- # ipool1m : 2.06212615967
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement