Advertisement
Guest User

Untitled

a guest
Feb 5th, 2012
90
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 5.71 KB | None | 0 0
  1. /*--------------------------------------------*/
  2. // aes_ecbl.cl
  3. /*--------------------------------------------*/
  4.  
  5.  
  6. // Table used for Rijndael exponentiation
  7. __constant unsigned char RCON[256] = {
  8.    0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a,
  9.    0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39,
  10.    0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a,
  11.    0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8,
  12.    0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef,
  13.    0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc,
  14.    0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b,
  15.    0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3,
  16.    0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94,
  17.    0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20,
  18.    0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35,
  19.    0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f,
  20.    0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04,
  21.    0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63,
  22.    0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd,
  23.    0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d};
  24.  
  25. // Forward  S-BOX
  26. __constant unsigned char SBOX[256] = {
  27.    0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
  28.    0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
  29.    0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
  30.    0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
  31.    0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
  32.    0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
  33.    0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
  34.    0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
  35.    0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
  36.    0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
  37.    0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
  38.    0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
  39.    0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
  40.    0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
  41.    0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
  42.    0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16
  43. };
  44.  
  45.  
  46. // TODO: assume 128 bit keys, handle keys of remaining length
  47. __kernel void aes_ecb(__constant unsigned char *key,
  48.               //__global unsigned char *inBuffer, __global unsigned char *outBuffer,
  49.               //__local unsigned char *locBuffer,
  50.               //__local unsigned char *expandedKey)
  51.               __global unsigned char *expandedKey)
  52. {
  53.   // Get local and global identifiers
  54.   const int R = get_global_id(0);
  55.   const int C = get_global_id(1);
  56.   const int r = get_local_id(0);
  57.   const int c = get_local_id(1);
  58.  
  59.  
  60.   // I - Use Rijndael key schedule for computing the expanded key
  61.  
  62.   // The expanded key is computed only by the 0-th work item of each workgroup
  63.   if (r==0 && c==0)
  64.   {
  65.     // TODO: unroll loops?
  66.  
  67.     // Variables used for key expansion
  68.     __global unsigned int *ekPtr = (__global unsigned int*)expandedKey;
  69.     unsigned int tmp;
  70.  
  71.     // First 16 bytes are the encryption key
  72.     ekPtr[0] = ((__constant unsigned int*)key)[0];
  73.     ekPtr[1] = ((__constant unsigned int*)key)[1];
  74.     ekPtr[1] = ((__constant unsigned int*)key)[2];
  75.     ekPtr[2] = ((__constant unsigned int*)key)[3];
  76.  
  77.    
  78.     // TODO: rewrite without using i<<4
  79.  
  80.     // Generate the remaining bytes
  81.     for (int i=1,j; i<=10; i++)
  82.     {
  83.       // multiples of 4
  84.       j=i<<2;
  85.  
  86.       // Get the values of the previous 4 bytes of the expanded key
  87.       tmp = ekPtr[j-1];
  88.  
  89.       // Perform key schedule core on tmp:
  90.       // 1- rotate
  91.       tmp = ((tmp<<8)&0xff000000)^((tmp<<8)&0xff0000)^((tmp<<8)&0xff00)^((tmp>>24)&0xff);
  92.  
  93.       // 2 - apply SBOX
  94.       tmp = (SBOX[tmp>>24]<<24)^(SBOX[(tmp>>16)&0xff]<<16)^(SBOX[(tmp>>8)&0xff]<<8)^(SBOX[tmp&0xff]);
  95.  
  96.       // 3 - perform RCON(i) and xor with the first byte of tmp
  97.       tmp = (tmp&0xffffff00)^((tmp&0xff)^RCON[i]);
  98.  
  99.       // Done with key schedule core
  100.  
  101.  
  102.       // Get first 4 bytes of the key expansion by xor-ing tmp with the 4 bytes 16 bytes before
  103.       ekPtr[j] = ekPtr[j-4]^tmp;
  104.  
  105.       // Do the same for the remaining 12 bytes of the expanded key
  106.       ekPtr[j+1] = ekPtr[j]^ekPtr[j-4 +1];
  107.       ekPtr[j+2] = ekPtr[j+1]^ekPtr[j-4 +2];
  108.       ekPtr[j+3] = ekPtr[j+2]^ekPtr[j-4 +3];
  109.     }
  110.  
  111.     // Done with key scheduling
  112.     //barrier(CLK_LOCAL_MEM_FENCE);
  113.     barrier(CLK_GLOBAL_MEM_FENCE);
  114.  
  115.   }
  116.   else
  117.   {
  118.     barrier(CLK_GLOBAL_MEM_FENCE);
  119.   }
  120.  
  121.  
  122.  
  123. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement