Advertisement
Guest User

Untitled

a guest
May 30th, 2013
217
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.33 KB | None | 0 0
  1. from binascii import unhexlify
  2. import collections
  3. from functools import partial
  4. from itertools import cycle, islice, imap
  5. import multiprocessing
  6. import numpy as np
  7. from timeit import timeit
  8. from Crypto.Cipher import AES
  9.  
  10. key = unhexlify('6bce1cb8d64153f82570751b6653c943')
  11. iv = unhexlify('b15a65475a91774a45106fbc28f0df70')
  12.  
  13. # I think you want a newly-initialized AES state for each plaintext,
  14. # so this function wraps that up.
  15. def aesify(plaintext):
  16.     aes = AES.new(key, AES.MODE_CBC, iv)
  17.     return aes.encrypt(plaintext)
  18.  
  19. # This creates a lazy iterator with an infinite number of copies of
  20. # the 10 hex strings from your example.
  21. hextexts = cycle('''f493befb2dcad5118d523a4a4bf4a504
  22. 54fc4e0a82ae8dc56cc7befc9994b79d
  23. 878d287647b457fd95d40691b6e0c8ab
  24. dc0adc16665eb96a15d3257752ae67dc
  25. 8cda3b8f23d38e9240b9a89587f69970
  26. e06301763146c1bac24619e61015f481
  27. c19def2f12e5707d89539e18ad104937
  28. 048d734a1a36d4346edc7ceda07ff171
  29. 5e621ce0a570478c1c2ec3e557ca3e0d
  30. e55c57b119ff922b7f87db0ead2006cd'''.splitlines())
  31.  
  32. # This transforms it into a lazy iterator with infinite binary
  33. # versions of the hex strings. Each of the islice calls below will
  34. # give us a lazy iterator that takes the next 1000000 binary strings
  35. # from this infinite list.
  36. texts = imap(unhexlify, hextexts)
  37.  
  38. # This is the simplest implementation, with a regular for loop. It's
  39. # slow, and it uses about 50MB at peak.
  40. def loop1m():
  41.     texts1m = islice(texts, 1000000)
  42.     result = []
  43.     for text in texts1m:
  44.         result.append(aesify(text))
  45.     return result
  46.  
  47. # This creates a list in memory, and maps it to another list. Letting
  48. # built-in functions do the looping means you skip the cost of a
  49. # Python for loop, which is pretty high. Basically the same benefit
  50. # you'd get from numpy, but not quite as good. And it uses about 72MB
  51. # at peak.
  52. def map1m():
  53.     texts1m = list(islice(texts, 1000000))
  54.     return map(aesify, texts1m)
  55.  
  56. # This just transforms the lazy iterator into another one, without
  57. # every building anything in memory. It's a bit slower, but it uses
  58. # 0MB (actually a few KB, not quite nothing... but close enough).
  59. def imap1m():
  60.     texts1m = islice(texts, 1000000)
  61.     return imap(aesify, texts1m)
  62.  
  63. # This uses numpy to vectorize the loop. That's a little faster than
  64. # map, but not much--and the cost of building the array in the first
  65. # place partially cancels that out. The real benefit over map here is
  66. # the space, 16MB.
  67. def numpy1m():
  68.     a = np.fromiter(islice(texts, 1000000), dtype='|S16')
  69.     return np.vectorize(aesify)(a)
  70.  
  71. # For comparison, here's the numpy version without the cost of
  72. # building the array included.
  73. ga = np.fromiter(islice(texts, 1000000), dtype='|S16')
  74. def numpypre1m():
  75.     return np.vectorize(aesify)(ga)
  76.  
  77. # Here's the simplest multiprocesing implementation. It's similar to
  78. # the map implementation, but almost 4x as fast. The memory use may be
  79. # a little higher, but not hugely. (It's hard to measure with sizes
  80. # this small, because each new Python process uses some memory before
  81. # it even gets started.)
  82. pool = multiprocessing.Pool()
  83. def pool1m():
  84.     texts1m = islice(texts, 1000000)
  85.     return pool.map(aesify, texts1m)
  86.  
  87. # Here's a slightly smarter one. It's similar to the imap
  88. # implementation (very little memory use), and it uses bigger chunks
  89. # than the default.
  90. def ipool1m():
  91.     texts1m = islice(texts, 1000000)
  92.     return pool.imap(aesify, texts1m, chunksize=8192)
  93.  
  94. # This is a convenient thing to do for performance testing. If you
  95. # just test how long it takes to transform one lazy iterator into
  96. # another, the answer is always nearly 0, because the actual work
  97. # doesn't happen until someone consumes the values. The fastest way to
  98. # consume an entire iterator (at least in CPython 2.7 and 3.2,
  99. # according to tests someone posted on python-ideas last year) is to
  100. # feed it into a deque with maxlen=0.
  101. def discard(it):
  102.     collections.deque(it, maxlen=0)
  103.  
  104. for f in loop1m, map1m, imap1m, numpy1m, numpypre1m, pool1m, ipool1m:
  105.     t = timeit(lambda: discard(f()), number=1)
  106.     print('{:10s}: {}'.format(f.__name__, t))
  107.    
  108. # Results on a 64-bit 4-core MacBook Pro with Apple Python 2.7.2:
  109. # loop1m    : 8.07510995865
  110. # map1m     : 7.20256185532
  111. # imap1m    : 7.79751801491
  112. # numpy1m   : 7.33888506889
  113. # numpypre1m: 6.62892603874
  114. # pool1m    : 2.94084405899
  115. # ipool1m   : 2.06212615967
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement