Pastebin is 300% more awesome when you are logged in. Sign Up, it's FREE!
Guest

Untitled

By: a guest on Oct 31st, 2012  |  syntax: C  |  size: 12.14 KB  |  hits: 35  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. // CUDA MD5 hash calculation implementation (A: mjuric@ias.edu).
  2. // A very useful link: http://people.eku.edu/styere/Encrypt/JS-MD5.html
  3.  
  4. #include <stdio.h>
  5.  
  6. typedef unsigned int uint;
  7.  
  8. // On-device variable declarations
  9. extern __shared__ char memory[];        // on-chip shared memory
  10. __constant__ uint k[64], rconst[16];    // constants (in fast on-chip constant cache)
  11. __constant__ uint target[4];            // target hash, if searching for hash matches
  12.  
  13. // MD5 magic numbers. These will be loaded into on-device "constant" memory
  14. static const uint k_cpu[64] = {
  15.         0xd76aa478, 0xe8c7b756, 0x242070db,     0xc1bdceee,
  16.         0xf57c0faf,     0x4787c62a, 0xa8304613, 0xfd469501,
  17.         0x698098d8,     0x8b44f7af,     0xffff5bb1,     0x895cd7be,
  18.         0x6b901122, 0xfd987193, 0xa679438e,     0x49b40821,
  19.  
  20.         0xf61e2562,     0xc040b340, 0x265e5a51, 0xe9b6c7aa,
  21.         0xd62f105d,     0x2441453,      0xd8a1e681,     0xe7d3fbc8,
  22.         0x21e1cde6,     0xc33707d6, 0xf4d50d87, 0x455a14ed,
  23.         0xa9e3e905,     0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
  24.  
  25.         0xfffa3942,     0x8771f681, 0x6d9d6122, 0xfde5380c,
  26.         0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
  27.         0x289b7ec6, 0xeaa127fa, 0xd4ef3085,     0x4881d05,
  28.         0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
  29.  
  30.         0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
  31.         0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
  32.         0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
  33.         0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391,
  34. };
  35.  
  36. static const uint rconst_cpu[16] = {
  37.         7, 12, 17, 22,   5,  9, 14, 20,   4, 11, 16, 23,   6, 10, 15, 21
  38. };
  39.  
  40. extern "C"
  41. void init_constants(uint *target_cpu)
  42. {
  43.         cudaMemcpyToSymbol(k, k_cpu, sizeof(k));
  44.         cudaMemcpyToSymbol(rconst, rconst_cpu, sizeof(rconst));
  45.         if(target_cpu) { cudaMemcpyToSymbol(target, target_cpu, 4*4); };
  46. }
  47.  
  48. // MD5 routines (straight from Wikipedia's MD5 pseudocode description)
  49. //__device__ inline uint leftrotate (uint x, uint c){
  50. //      return (x << c) | (x >> (32 - c));
  51. //}
  52.  
  53. //__device__ inline uint r(const uint i){
  54. //      return rconst[(i / 16) * 4 + i % 4];
  55. //}
  56.  
  57. // Accessor for w[16] array. Naively, this would just be w[i]; however, this
  58. // choice leads to worst-case-scenario access pattern wrt. shared memory
  59. // bank conflicts, as the same indices in different threads fall into the
  60. // same bank (as the words are 16 uints long). The packing below causes the
  61. // same indices in different threads of a warp to map to different banks. In
  62. // testing this gave a ~40% speedup.
  63. //
  64. // PS: An alternative solution would be to make the w array 17 uints long
  65. // (thus wasting a little shared memory)
  66. //__device__ inline uint &getw(uint *w, const int i){
  67. //      return w[i/* + threadIdx.x) % 16*/];
  68. //}
  69.  
  70. // const- version
  71. __device__ inline uint getw(const uint *w, const int i){
  72.         return w[i/* + threadIdx.x) % 16*/];
  73. }
  74.  
  75.  
  76. //__device__ inline uint getk(const int i){
  77. //      return k[i];    // Note: this is as fast as possible (measured)
  78. //}
  79.  
  80. //////////////////////////////////////////////////////////////////////////////
  81. /////////////       Ron Rivest's MD5 C Implementation       //////////////////
  82. //////////////////////////////////////////////////////////////////////////////
  83.  
  84. /*
  85.  **********************************************************************
  86.  ** Copyright (C) 1990, RSA Data Security, Inc. All rights reserved. **
  87.  **                                                                  **
  88.  ** License to copy and use this software is granted provided that   **
  89.  ** it is identified as the "RSA Data Security, Inc. MD5 Message     **
  90.  ** Digest Algorithm" in all material mentioning or referencing this **
  91.  ** software or this function.                                       **
  92.  **                                                                  **
  93.  ** License is also granted to make and use derivative works         **
  94.  ** provided that such works are identified as "derived from the RSA **
  95.  ** Data Security, Inc. MD5 Message Digest Algorithm" in all         **
  96.  ** material mentioning or referencing the derived work.             **
  97.  **                                                                  **
  98.  ** RSA Data Security, Inc. makes no representations concerning      **
  99.  ** either the merchantability of this software or the suitability   **
  100.  ** of this software for any particular purpose.  It is provided "as **
  101.  ** is" without express or implied warranty of any kind.             **
  102.  **                                                                  **
  103.  ** These notices must be retained in any copies of any part of this **
  104.  ** documentation and/or software.                                   **
  105.  **********************************************************************
  106.  */
  107.  
  108.  
  109. /* F, G and H are basic MD5 functions: selection, majority, parity */
  110. #define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
  111. #define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
  112. #define H(x, y, z) ((x) ^ (y) ^ (z))
  113. #define I(x, y, z) ((y) ^ ((x) | (~z)))
  114.  
  115. /* ROTATE_LEFT rotates x left n bits */
  116. #define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
  117.  
  118. /* FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4 */
  119. /* Rotation is separate from addition to prevent recomputation */
  120. #define FF(a, b, c, d, x, s, ac) \
  121.   {(a) += F ((b), (c), (d)) + (x) + (uint)(ac); \
  122.    (a) = ROTATE_LEFT ((a), (s)); \
  123.    (a) += (b); \
  124.   }
  125. #define GG(a, b, c, d, x, s, ac) \
  126.   {(a) += G ((b), (c), (d)) + (x) + (uint)(ac); \
  127.    (a) = ROTATE_LEFT ((a), (s)); \
  128.    (a) += (b); \
  129.   }
  130. #define HH(a, b, c, d, x, s, ac) \
  131.   {(a) += H ((b), (c), (d)) + (x) + (uint)(ac); \
  132.    (a) = ROTATE_LEFT ((a), (s)); \
  133.    (a) += (b); \
  134.   }
  135. #define II(a, b, c, d, x, s, ac) \
  136.   {(a) += I ((b), (c), (d)) + (x) + (uint)(ac); \
  137.    (a) = ROTATE_LEFT ((a), (s)); \
  138.    (a) += (b); \
  139.   }
  140.  
  141. // Basic MD5 step. Transform buf based on in.
  142. void inline __device__ md5_gpu(const uint *in, uint &a, uint &b, uint &c, uint &d){
  143.         const uint a0 = 0x67452301;
  144.         const uint b0 = 0xEFCDAB89;
  145.         const uint c0 = 0x98BADCFE;
  146.         const uint d0 = 0x10325476;
  147.  
  148.         //Initialize hash value for this chunk:
  149.         a = a0;
  150.         b = b0;
  151.         c = c0;
  152.         d = d0;
  153.  
  154.   /* Round 1 */
  155. #define S11 7
  156. #define S12 12
  157. #define S13 17
  158. #define S14 22
  159.   FF ( a, b, c, d, getw(in,  0), S11, 3614090360); /* 1 */
  160.   FF ( d, a, b, c, getw(in,  1), S12, 3905402710); /* 2 */
  161.   FF ( c, d, a, b, getw(in,  2), S13,  606105819); /* 3 */
  162.   FF ( b, c, d, a, getw(in,  3), S14, 3250441966); /* 4 */
  163.   FF ( a, b, c, d, getw(in,  4), S11, 4118548399); /* 5 */
  164.   FF ( d, a, b, c, getw(in,  5), S12, 1200080426); /* 6 */
  165.   FF ( c, d, a, b, getw(in,  6), S13, 2821735955); /* 7 */
  166.   FF ( b, c, d, a, getw(in,  7), S14, 4249261313); /* 8 */
  167.   FF ( a, b, c, d, getw(in,  8), S11, 1770035416); /* 9 */
  168.   FF ( d, a, b, c, getw(in,  9), S12, 2336552879); /* 10 */
  169.   FF ( c, d, a, b, getw(in, 10), S13, 4294925233); /* 11 */
  170.   FF ( b, c, d, a, getw(in, 11), S14, 2304563134); /* 12 */
  171.   FF ( a, b, c, d, getw(in, 12), S11, 1804603682); /* 13 */
  172.   FF ( d, a, b, c, getw(in, 13), S12, 4254626195); /* 14 */
  173.   FF ( c, d, a, b, getw(in, 14), S13, 2792965006); /* 15 */
  174.   FF ( b, c, d, a, getw(in, 15), S14, 1236535329); /* 16 */
  175.  
  176.   /* Round 2 */
  177. #define S21 5
  178. #define S22 9
  179. #define S23 14
  180. #define S24 20
  181.   GG ( a, b, c, d, getw(in,  1), S21, 4129170786); /* 17 */
  182.   GG ( d, a, b, c, getw(in,  6), S22, 3225465664); /* 18 */
  183.   GG ( c, d, a, b, getw(in, 11), S23,  643717713); /* 19 */
  184.   GG ( b, c, d, a, getw(in,  0), S24, 3921069994); /* 20 */
  185.   GG ( a, b, c, d, getw(in,  5), S21, 3593408605); /* 21 */
  186.   GG ( d, a, b, c, getw(in, 10), S22,   38016083); /* 22 */
  187.   GG ( c, d, a, b, getw(in, 15), S23, 3634488961); /* 23 */
  188.   GG ( b, c, d, a, getw(in,  4), S24, 3889429448); /* 24 */
  189.   GG ( a, b, c, d, getw(in,  9), S21,  568446438); /* 25 */
  190.   GG ( d, a, b, c, getw(in, 14), S22, 3275163606); /* 26 */
  191.   GG ( c, d, a, b, getw(in,  3), S23, 4107603335); /* 27 */
  192.   GG ( b, c, d, a, getw(in,  8), S24, 1163531501); /* 28 */
  193.   GG ( a, b, c, d, getw(in, 13), S21, 2850285829); /* 29 */
  194.   GG ( d, a, b, c, getw(in,  2), S22, 4243563512); /* 30 */
  195.   GG ( c, d, a, b, getw(in,  7), S23, 1735328473); /* 31 */
  196.   GG ( b, c, d, a, getw(in, 12), S24, 2368359562); /* 32 */
  197.  
  198.   /* Round 3 */
  199. #define S31 4
  200. #define S32 11
  201. #define S33 16
  202. #define S34 23
  203.   HH ( a, b, c, d, getw(in,  5), S31, 4294588738); /* 33 */
  204.   HH ( d, a, b, c, getw(in,  8), S32, 2272392833); /* 34 */
  205.   HH ( c, d, a, b, getw(in, 11), S33, 1839030562); /* 35 */
  206.   HH ( b, c, d, a, getw(in, 14), S34, 4259657740); /* 36 */
  207.   HH ( a, b, c, d, getw(in,  1), S31, 2763975236); /* 37 */
  208.   HH ( d, a, b, c, getw(in,  4), S32, 1272893353); /* 38 */
  209.   HH ( c, d, a, b, getw(in,  7), S33, 4139469664); /* 39 */
  210.   HH ( b, c, d, a, getw(in, 10), S34, 3200236656); /* 40 */
  211.   HH ( a, b, c, d, getw(in, 13), S31,  681279174); /* 41 */
  212.   HH ( d, a, b, c, getw(in,  0), S32, 3936430074); /* 42 */
  213.   HH ( c, d, a, b, getw(in,  3), S33, 3572445317); /* 43 */
  214.   HH ( b, c, d, a, getw(in,  6), S34,   76029189); /* 44 */
  215.   HH ( a, b, c, d, getw(in,  9), S31, 3654602809); /* 45 */
  216.   HH ( d, a, b, c, getw(in, 12), S32, 3873151461); /* 46 */
  217.   HH ( c, d, a, b, getw(in, 15), S33,  530742520); /* 47 */
  218.   HH ( b, c, d, a, getw(in,  2), S34, 3299628645); /* 48 */
  219.  
  220.   /* Round 4 */
  221. #define S41 6
  222. #define S42 10
  223. #define S43 15
  224. #define S44 21
  225.   II ( a, b, c, d, getw(in,  0), S41, 4096336452); /* 49 */
  226.   II ( d, a, b, c, getw(in,  7), S42, 1126891415); /* 50 */
  227.   II ( c, d, a, b, getw(in, 14), S43, 2878612391); /* 51 */
  228.   II ( b, c, d, a, getw(in,  5), S44, 4237533241); /* 52 */
  229.   II ( a, b, c, d, getw(in, 12), S41, 1700485571); /* 53 */
  230.   II ( d, a, b, c, getw(in,  3), S42, 2399980690); /* 54 */
  231.   II ( c, d, a, b, getw(in, 10), S43, 4293915773); /* 55 */
  232.   II ( b, c, d, a, getw(in,  1), S44, 2240044497); /* 56 */
  233.   II ( a, b, c, d, getw(in,  8), S41, 1873313359); /* 57 */
  234.   II ( d, a, b, c, getw(in, 15), S42, 4264355552); /* 58 */
  235.   II ( c, d, a, b, getw(in,  6), S43, 2734768916); /* 59 */
  236.   II ( b, c, d, a, getw(in, 13), S44, 1309151649); /* 60 */
  237.   II ( a, b, c, d, getw(in,  4), S41, 4149444226); /* 61 */
  238.   II ( d, a, b, c, getw(in, 11), S42, 3174756917); /* 62 */
  239.   II ( c, d, a, b, getw(in,  2), S43,  718787259); /* 63 */
  240.   II ( b, c, d, a, getw(in,  9), S44, 3951481745); /* 64 */
  241.  
  242.         a += a0;
  243.         b += b0;
  244.         c += c0;
  245.         d += d0;
  246. }
  247.  
  248. // The kernel (this is the entrypoint of GPU code)
  249. // Loads the 64-byte word to be hashed from g   lobal to shared memory and calls the calculation routine
  250. __global__ void md5_calc(/*char *gwords, char *paddedWords3,*/ uint *hash, int realthreads, int msg_size, int size_hash, uint launch){
  251.         int linidx;
  252.  
  253.         // assuming blockDim.y = 1 and threadIdx.y = 0, always
  254.         linidx = threadIdx.x + ((blockIdx.x + blockIdx.y * gridDim.x) * blockDim.x);
  255.  
  256.         // this check slows down the code by ~0.4% (measured)
  257.         if(linidx >= realthreads){
  258.                 return;
  259.         }
  260.  
  261.         uint a;
  262.         uint b;
  263.         uint c;
  264.         uint d;
  265.  
  266.         char msg[64] = {0};
  267.  
  268.         unsigned long long int password;
  269.  
  270.         password = (launch * realthreads) + linidx + 1;
  271.  
  272.         b = 0;
  273.        
  274.         //convert to ASCII
  275.         while(password != 0){
  276.                 msg[b] = password % 10 + 48; //ASCII;;
  277.                 password /= 10;
  278.                 b++;
  279.         }
  280.  
  281.         c = 0;
  282.        
  283.         //invert number order
  284.         while(c < (b / 2)){
  285.                 d = msg[c];
  286.                 msg[c] = msg[b - c - 1];
  287.                 msg[b - c - 1] = d;
  288.        
  289.                 c++;
  290.         }
  291.  
  292.         // load the dictionary word for this thread
  293.         b = 0;
  294.  
  295.         for(a = 0; a < msg_size; a++){
  296.                 //end of the password
  297.                 if(msg[a] == 0){
  298.                         msg[a] = 0x80;
  299.                         b = a;
  300.  
  301.                         break;
  302.                 }
  303.         }
  304.  
  305.         if(!b){
  306.                 msg[msg_size] = 0x80;
  307.        
  308.                 msg[56] = msg_size * 8;
  309.         }
  310.         else{
  311.                 msg[56] = b * 8;
  312.         }
  313.  
  314.         // compute MD5 hash
  315.         md5_gpu((uint *)msg, a, b, c, d);
  316.  
  317.         // return the hash converted to number
  318.         // all this work around is because of different edian order
  319.         if(size_hash == 1){
  320.                 hash[(linidx)] = ((a & 240) >> 4);
  321.         }
  322.         else{
  323.                 if(size_hash == 2){
  324.                         hash[(linidx)] = a & 255;
  325.                 }
  326.                 else{
  327.                         if(size_hash == 3){
  328.                                 hash[(linidx)] = ((a & 61440) >> 12) + ((a & 240) << 4) + ((a & 15) << 4);
  329.                         }
  330.                         else{
  331.                                 if(size_hash == 4){
  332.                                         hash[(linidx)] = ((a & 61440) >> 8) + ((a & 3840) >> 8) + ((a & 240) << 8) + ((a & 15) << 8);
  333.                                 }
  334.                         }
  335.                 }
  336.         }
  337. }
  338.  
  339.  
  340. int main(){
  341.  
  342.   char *gwords = NULL;
  343.   char *paddedWords3 = NULL;
  344.   uint *hash = NULL;
  345.   uint realthreads = 1;
  346.   uint msg_size = 1;
  347.   uint size_hash = 1;
  348.   uint launch = 0;
  349.  
  350.   md5_calc<<<1,1>>>(hash, realthreads, msg_size, size_hash, launch);
  351. }