Untitled

from binascii import unhexlify
import collections
from functools import partial
from itertools import cycle, islice, imap
import multiprocessing
import numpy as np
from timeit import timeit
from Crypto.Cipher import AES

key = unhexlify('6bce1cb8d64153f82570751b6653c943')
iv = unhexlify('b15a65475a91774a45106fbc28f0df70')

# I think you want a newly-initialized AES state for each plaintext,
# so this function wraps that up.
def aesify(plaintext):
    aes = AES.new(key, AES.MODE_CBC, iv)
    return aes.encrypt(plaintext)

# This creates a lazy iterator with an infinite number of copies of
# the 10 hex strings from your example.
hextexts = cycle('''f493befb2dcad5118d523a4a4bf4a504
54fc4e0a82ae8dc56cc7befc9994b79d
878d287647b457fd95d40691b6e0c8ab
dc0adc16665eb96a15d3257752ae67dc
8cda3b8f23d38e9240b9a89587f69970
e06301763146c1bac24619e61015f481
c19def2f12e5707d89539e18ad104937
048d734a1a36d4346edc7ceda07ff171
5e621ce0a570478c1c2ec3e557ca3e0d
e55c57b119ff922b7f87db0ead2006cd'''.splitlines())

# This transforms it into a lazy iterator with infinite binary
# versions of the hex strings. Each of the islice calls below will
# give us a lazy iterator that takes the next 1000000 binary strings
# from this infinite list.
texts = imap(unhexlify, hextexts)

# This is the simplest implementation, with a regular for loop. It's
# slow, and it uses about 50MB at peak.
def loop1m():
    texts1m = islice(texts, 1000000)
    result = []
    for text in texts1m:
        result.append(aesify(text))
    return result

# This creates a list in memory, and maps it to another list. Letting
# built-in functions do the looping means you skip the cost of a
# Python for loop, which is pretty high. Basically the same benefit
# you'd get from numpy, but not quite as good. And it uses about 72MB
# at peak.
def map1m():
    texts1m = list(islice(texts, 1000000))
    return map(aesify, texts1m)

# This just transforms the lazy iterator into another one, without
# every building anything in memory. It's a bit slower, but it uses
# 0MB (actually a few KB, not quite nothing... but close enough).
def imap1m():
    texts1m = islice(texts, 1000000)
    return imap(aesify, texts1m)

# This uses numpy to vectorize the loop. That's a little faster than
# map, but not much--and the cost of building the array in the first
# place partially cancels that out. The real benefit over map here is
# the space, 16MB.
def numpy1m():
    a = np.fromiter(islice(texts, 1000000), dtype='|S16')
    return np.vectorize(aesify)(a)

# For comparison, here's the numpy version without the cost of
# building the array included.
ga = np.fromiter(islice(texts, 1000000), dtype='|S16')
def numpypre1m():
    return np.vectorize(aesify)(ga)

# Here's the simplest multiprocesing implementation. It's similar to
# the map implementation, but almost 4x as fast. The memory use may be
# a little higher, but not hugely. (It's hard to measure with sizes
# this small, because each new Python process uses some memory before
# it even gets started.)
pool = multiprocessing.Pool()
def pool1m():
    texts1m = islice(texts, 1000000)
    return pool.map(aesify, texts1m)

# Here's a slightly smarter one. It's similar to the imap
# implementation (very little memory use), and it uses bigger chunks
# than the default.
def ipool1m():
    texts1m = islice(texts, 1000000)
    return pool.imap(aesify, texts1m, chunksize=8192)

# This is a convenient thing to do for performance testing. If you
# just test how long it takes to transform one lazy iterator into
# another, the answer is always nearly 0, because the actual work
# doesn't happen until someone consumes the values. The fastest way to
# consume an entire iterator (at least in CPython 2.7 and 3.2,
# according to tests someone posted on python-ideas last year) is to
# feed it into a deque with maxlen=0.
def discard(it):
    collections.deque(it, maxlen=0)

for f in loop1m, map1m, imap1m, numpy1m, numpypre1m, pool1m, ipool1m:
    t = timeit(lambda: discard(f()), number=1)
    print('{:10s}: {}'.format(f.__name__, t))

# Results on a 64-bit 4-core MacBook Pro with Apple Python 2.7.2:
# loop1m    : 8.07510995865
# map1m     : 7.20256185532
# imap1m    : 7.79751801491
# numpy1m   : 7.33888506889
# numpypre1m: 6.62892603874
# pool1m    : 2.94084405899
# ipool1m   : 2.06212615967