Advertisement
Guest User

Untitled

a guest
May 21st, 2014
2,006
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 284.46 KB | None | 0 0
  1. * X11 kernel implementation.
  2. *
  3. * ==========================(LICENSE BEGIN)============================
  4. *
  5. * Copyright (c) 2014 phm
  6. *
  7. * Permission is hereby granted, free of charge, to any person obtaining
  8. * a copy of this software and associated documentation files (the
  9. * "Software"), to deal in the Software without restriction, including
  10. * without limitation the rights to use, copy, modify, merge, publish,
  11. * distribute, sublicense, and/or sell copies of the Software, and to
  12. * permit persons to whom the Software is furnished to do so, subject to
  13. * the following conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be
  16. * included in all copies or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  22. * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23. * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24. * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25. *
  26. * ===========================(LICENSE END)=============================
  27. *
  28. * @author phm <phm@inbox.com>
  29. */
  30.  
  31. #ifdef __ECLIPSE_EDITOR__
  32. #include "OpenCLKernel.hpp"
  33. #endif
  34.  
  35. #ifndef DARKCOIN_CL
  36. #define DARKCOIN_CL
  37.  
  38. #if __ENDIAN_LITTLE__
  39. #define SPH_LITTLE_ENDIAN 1
  40. #else
  41. #define SPH_BIG_ENDIAN 1
  42. #endif
  43.  
  44. #define SPH_UPTR sph_u64
  45.  
  46. typedef unsigned int sph_u32;
  47. typedef int sph_s32;
  48. #ifndef __OPENCL_VERSION__
  49. typedef unsigned long long sph_u64;
  50. typedef long long sph_s64;
  51. #else
  52. typedef unsigned long sph_u64;
  53. typedef long sph_s64;
  54. #endif
  55.  
  56. #define SPH_64 1
  57. #define SPH_64_TRUE 1
  58.  
  59. #define SPH_C32(x) ((sph_u32)(x ## U))
  60. #define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
  61. #define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
  62. #define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n)))
  63.  
  64. #define SPH_C64(x) ((sph_u64)(x ## UL))
  65. #define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF))
  66. #define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n))))
  67. #define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n)))
  68.  
  69. #define SPH_ECHO_64 1
  70. #define SPH_KECCAK_64 1
  71. #define SPH_JH_64 1
  72. #define SPH_SIMD_NOCOPY 0
  73. #define SPH_KECCAK_NOCOPY 0
  74. #define SPH_COMPACT_BLAKE_64 0
  75. #define SPH_LUFFA_PARALLEL 0
  76. #ifndef SPH_SMALL_FOOTPRINT_GROESTL
  77. #define SPH_SMALL_FOOTPRINT_GROESTL 0
  78. #endif
  79. #define SPH_GROESTL_BIG_ENDIAN 0
  80.  
  81. #define SPH_CUBEHASH_UNROLL 0
  82. #define SPH_KECCAK_UNROLL 0
  83.  
  84. #ifndef AES_HELPER_H
  85. #define AES_HELPER_H
  86.  
  87. /* $Id: aes_helper.c 220 2010-06-09 09:21:50Z tp $ */
  88. /*
  89. * AES tables. This file is not meant to be compiled by itself; it
  90. * is included by some hash function implementations. It contains
  91. * the precomputed tables and helper macros for evaluating an AES
  92. * round, optionally with a final XOR with a subkey.
  93. *
  94. * By default, this file defines the tables and macros for little-endian
  95. * processing (i.e. it is assumed that the input bytes have been read
  96. * from memory and assembled with the little-endian convention). If
  97. * the 'AES_BIG_ENDIAN' macro is defined (to a non-zero integer value)
  98. * when this file is included, then the tables and macros for big-endian
  99. * processing are defined instead. The big-endian tables and macros have
  100. * names distinct from the little-endian tables and macros, hence it is
  101. * possible to have both simultaneously, by including this file twice
  102. * (with and without the AES_BIG_ENDIAN macro).
  103. *
  104. * ==========================(LICENSE BEGIN)============================
  105. *
  106. * Copyright (c) 2007-2010 Projet RNRT SAPHIR
  107. *
  108. * Permission is hereby granted, free of charge, to any person obtaining
  109. * a copy of this software and associated documentation files (the
  110. * "Software"), to deal in the Software without restriction, including
  111. * without limitation the rights to use, copy, modify, merge, publish,
  112. * distribute, sublicense, and/or sell copies of the Software, and to
  113. * permit persons to whom the Software is furnished to do so, subject to
  114. * the following conditions:
  115. *
  116. * The above copyright notice and this permission notice shall be
  117. * included in all copies or substantial portions of the Software.
  118. *
  119. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  120. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  121. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  122. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  123. * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  124. * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  125. * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  126. *
  127. * ===========================(LICENSE END)=============================
  128. *
  129. * @author Thomas Pornin <thomas.pornin@cryptolog.com>
  130. */
  131.  
  132. #if AES_BIG_ENDIAN
  133.  
  134. #define AESx(x) ( ((SPH_C32(x) >> 24) & SPH_C32(0x000000FF)) \
  135. | ((SPH_C32(x) >> 8) & SPH_C32(0x0000FF00)) \
  136. | ((SPH_C32(x) << 8) & SPH_C32(0x00FF0000)) \
  137. | ((SPH_C32(x) << 24) & SPH_C32(0xFF000000)))
  138.  
  139. #define AES0 AES0_BE
  140. #define AES1 AES1_BE
  141. #define AES2 AES2_BE
  142. #define AES3 AES3_BE
  143.  
  144. #define AES_ROUND_BE(X0, X1, X2, X3, K0, K1, K2, K3, Y0, Y1, Y2, Y3) do { \
  145. (Y0) = AES0[((X0) >> 24) & 0xFF] \
  146. ^ AES1[((X1) >> 16) & 0xFF] \
  147. ^ AES2[((X2) >> 8) & 0xFF] \
  148. ^ AES3[(X3) & 0xFF] ^ (K0); \
  149. (Y1) = AES0[((X1) >> 24) & 0xFF] \
  150. ^ AES1[((X2) >> 16) & 0xFF] \
  151. ^ AES2[((X3) >> 8) & 0xFF] \
  152. ^ AES3[(X0) & 0xFF] ^ (K1); \
  153. (Y2) = AES0[((X2) >> 24) & 0xFF] \
  154. ^ AES1[((X3) >> 16) & 0xFF] \
  155. ^ AES2[((X0) >> 8) & 0xFF] \
  156. ^ AES3[(X1) & 0xFF] ^ (K2); \
  157. (Y3) = AES0[((X3) >> 24) & 0xFF] \
  158. ^ AES1[((X0) >> 16) & 0xFF] \
  159. ^ AES2[((X1) >> 8) & 0xFF] \
  160. ^ AES3[(X2) & 0xFF] ^ (K3); \
  161. } while (0)
  162.  
  163. #define AES_ROUND_NOKEY_BE(X0, X1, X2, X3, Y0, Y1, Y2, Y3) \
  164. AES_ROUND_BE(X0, X1, X2, X3, 0, 0, 0, 0, Y0, Y1, Y2, Y3)
  165.  
  166. #else
  167.  
  168. #define AESx(x) SPH_C32(x)
  169. #define AES0 AES0_LE
  170. #define AES1 AES1_LE
  171. #define AES2 AES2_LE
  172. #define AES3 AES3_LE
  173.  
  174. #define AES_ROUND_LE(X0, X1, X2, X3, K0, K1, K2, K3, Y0, Y1, Y2, Y3) do { \
  175. (Y0) = AES0[(X0) & 0xFF] \
  176. ^ AES1[((X1) >> 8) & 0xFF] \
  177. ^ AES2[((X2) >> 16) & 0xFF] \
  178. ^ AES3[((X3) >> 24) & 0xFF] ^ (K0); \
  179. (Y1) = AES0[(X1) & 0xFF] \
  180. ^ AES1[((X2) >> 8) & 0xFF] \
  181. ^ AES2[((X3) >> 16) & 0xFF] \
  182. ^ AES3[((X0) >> 24) & 0xFF] ^ (K1); \
  183. (Y2) = AES0[(X2) & 0xFF] \
  184. ^ AES1[((X3) >> 8) & 0xFF] \
  185. ^ AES2[((X0) >> 16) & 0xFF] \
  186. ^ AES3[((X1) >> 24) & 0xFF] ^ (K2); \
  187. (Y3) = AES0[(X3) & 0xFF] \
  188. ^ AES1[((X0) >> 8) & 0xFF] \
  189. ^ AES2[((X1) >> 16) & 0xFF] \
  190. ^ AES3[((X2) >> 24) & 0xFF] ^ (K3); \
  191. } while (0)
  192.  
  193. #define AES_ROUND_NOKEY_LE(X0, X1, X2, X3, Y0, Y1, Y2, Y3) \
  194. AES_ROUND_LE(X0, X1, X2, X3, 0, 0, 0, 0, Y0, Y1, Y2, Y3)
  195.  
  196. #endif
  197.  
  198. /*
  199. * The AES*[] tables allow us to perform a fast evaluation of an AES
  200. * round; table AESi[] combines SubBytes for a byte at row i, and
  201. * MixColumns for the column where that byte goes after ShiftRows.
  202. */
  203.  
  204. __constant const sph_u32 AES0_C[256] = {
  205. AESx(0xA56363C6), AESx(0x847C7CF8), AESx(0x997777EE), AESx(0x8D7B7BF6),
  206. AESx(0x0DF2F2FF), AESx(0xBD6B6BD6), AESx(0xB16F6FDE), AESx(0x54C5C591),
  207. AESx(0x50303060), AESx(0x03010102), AESx(0xA96767CE), AESx(0x7D2B2B56),
  208. AESx(0x19FEFEE7), AESx(0x62D7D7B5), AESx(0xE6ABAB4D), AESx(0x9A7676EC),
  209. AESx(0x45CACA8F), AESx(0x9D82821F), AESx(0x40C9C989), AESx(0x877D7DFA),
  210. AESx(0x15FAFAEF), AESx(0xEB5959B2), AESx(0xC947478E), AESx(0x0BF0F0FB),
  211. AESx(0xECADAD41), AESx(0x67D4D4B3), AESx(0xFDA2A25F), AESx(0xEAAFAF45),
  212. AESx(0xBF9C9C23), AESx(0xF7A4A453), AESx(0x967272E4), AESx(0x5BC0C09B),
  213. AESx(0xC2B7B775), AESx(0x1CFDFDE1), AESx(0xAE93933D), AESx(0x6A26264C),
  214. AESx(0x5A36366C), AESx(0x413F3F7E), AESx(0x02F7F7F5), AESx(0x4FCCCC83),
  215. AESx(0x5C343468), AESx(0xF4A5A551), AESx(0x34E5E5D1), AESx(0x08F1F1F9),
  216. AESx(0x937171E2), AESx(0x73D8D8AB), AESx(0x53313162), AESx(0x3F15152A),
  217. AESx(0x0C040408), AESx(0x52C7C795), AESx(0x65232346), AESx(0x5EC3C39D),
  218. AESx(0x28181830), AESx(0xA1969637), AESx(0x0F05050A), AESx(0xB59A9A2F),
  219. AESx(0x0907070E), AESx(0x36121224), AESx(0x9B80801B), AESx(0x3DE2E2DF),
  220. AESx(0x26EBEBCD), AESx(0x6927274E), AESx(0xCDB2B27F), AESx(0x9F7575EA),
  221. AESx(0x1B090912), AESx(0x9E83831D), AESx(0x742C2C58), AESx(0x2E1A1A34),
  222. AESx(0x2D1B1B36), AESx(0xB26E6EDC), AESx(0xEE5A5AB4), AESx(0xFBA0A05B),
  223. AESx(0xF65252A4), AESx(0x4D3B3B76), AESx(0x61D6D6B7), AESx(0xCEB3B37D),
  224. AESx(0x7B292952), AESx(0x3EE3E3DD), AESx(0x712F2F5E), AESx(0x97848413),
  225. AESx(0xF55353A6), AESx(0x68D1D1B9), AESx(0x00000000), AESx(0x2CEDEDC1),
  226. AESx(0x60202040), AESx(0x1FFCFCE3), AESx(0xC8B1B179), AESx(0xED5B5BB6),
  227. AESx(0xBE6A6AD4), AESx(0x46CBCB8D), AESx(0xD9BEBE67), AESx(0x4B393972),
  228. AESx(0xDE4A4A94), AESx(0xD44C4C98), AESx(0xE85858B0), AESx(0x4ACFCF85),
  229. AESx(0x6BD0D0BB), AESx(0x2AEFEFC5), AESx(0xE5AAAA4F), AESx(0x16FBFBED),
  230. AESx(0xC5434386), AESx(0xD74D4D9A), AESx(0x55333366), AESx(0x94858511),
  231. AESx(0xCF45458A), AESx(0x10F9F9E9), AESx(0x06020204), AESx(0x817F7FFE),
  232. AESx(0xF05050A0), AESx(0x443C3C78), AESx(0xBA9F9F25), AESx(0xE3A8A84B),
  233. AESx(0xF35151A2), AESx(0xFEA3A35D), AESx(0xC0404080), AESx(0x8A8F8F05),
  234. AESx(0xAD92923F), AESx(0xBC9D9D21), AESx(0x48383870), AESx(0x04F5F5F1),
  235. AESx(0xDFBCBC63), AESx(0xC1B6B677), AESx(0x75DADAAF), AESx(0x63212142),
  236. AESx(0x30101020), AESx(0x1AFFFFE5), AESx(0x0EF3F3FD), AESx(0x6DD2D2BF),
  237. AESx(0x4CCDCD81), AESx(0x140C0C18), AESx(0x35131326), AESx(0x2FECECC3),
  238. AESx(0xE15F5FBE), AESx(0xA2979735), AESx(0xCC444488), AESx(0x3917172E),
  239. AESx(0x57C4C493), AESx(0xF2A7A755), AESx(0x827E7EFC), AESx(0x473D3D7A),
  240. AESx(0xAC6464C8), AESx(0xE75D5DBA), AESx(0x2B191932), AESx(0x957373E6),
  241. AESx(0xA06060C0), AESx(0x98818119), AESx(0xD14F4F9E), AESx(0x7FDCDCA3),
  242. AESx(0x66222244), AESx(0x7E2A2A54), AESx(0xAB90903B), AESx(0x8388880B),
  243. AESx(0xCA46468C), AESx(0x29EEEEC7), AESx(0xD3B8B86B), AESx(0x3C141428),
  244. AESx(0x79DEDEA7), AESx(0xE25E5EBC), AESx(0x1D0B0B16), AESx(0x76DBDBAD),
  245. AESx(0x3BE0E0DB), AESx(0x56323264), AESx(0x4E3A3A74), AESx(0x1E0A0A14),
  246. AESx(0xDB494992), AESx(0x0A06060C), AESx(0x6C242448), AESx(0xE45C5CB8),
  247. AESx(0x5DC2C29F), AESx(0x6ED3D3BD), AESx(0xEFACAC43), AESx(0xA66262C4),
  248. AESx(0xA8919139), AESx(0xA4959531), AESx(0x37E4E4D3), AESx(0x8B7979F2),
  249. AESx(0x32E7E7D5), AESx(0x43C8C88B), AESx(0x5937376E), AESx(0xB76D6DDA),
  250. AESx(0x8C8D8D01), AESx(0x64D5D5B1), AESx(0xD24E4E9C), AESx(0xE0A9A949),
  251. AESx(0xB46C6CD8), AESx(0xFA5656AC), AESx(0x07F4F4F3), AESx(0x25EAEACF),
  252. AESx(0xAF6565CA), AESx(0x8E7A7AF4), AESx(0xE9AEAE47), AESx(0x18080810),
  253. AESx(0xD5BABA6F), AESx(0x887878F0), AESx(0x6F25254A), AESx(0x722E2E5C),
  254. AESx(0x241C1C38), AESx(0xF1A6A657), AESx(0xC7B4B473), AESx(0x51C6C697),
  255. AESx(0x23E8E8CB), AESx(0x7CDDDDA1), AESx(0x9C7474E8), AESx(0x211F1F3E),
  256. AESx(0xDD4B4B96), AESx(0xDCBDBD61), AESx(0x868B8B0D), AESx(0x858A8A0F),
  257. AESx(0x907070E0), AESx(0x423E3E7C), AESx(0xC4B5B571), AESx(0xAA6666CC),
  258. AESx(0xD8484890), AESx(0x05030306), AESx(0x01F6F6F7), AESx(0x120E0E1C),
  259. AESx(0xA36161C2), AESx(0x5F35356A), AESx(0xF95757AE), AESx(0xD0B9B969),
  260. AESx(0x91868617), AESx(0x58C1C199), AESx(0x271D1D3A), AESx(0xB99E9E27),
  261. AESx(0x38E1E1D9), AESx(0x13F8F8EB), AESx(0xB398982B), AESx(0x33111122),
  262. AESx(0xBB6969D2), AESx(0x70D9D9A9), AESx(0x898E8E07), AESx(0xA7949433),
  263. AESx(0xB69B9B2D), AESx(0x221E1E3C), AESx(0x92878715), AESx(0x20E9E9C9),
  264. AESx(0x49CECE87), AESx(0xFF5555AA), AESx(0x78282850), AESx(0x7ADFDFA5),
  265. AESx(0x8F8C8C03), AESx(0xF8A1A159), AESx(0x80898909), AESx(0x170D0D1A),
  266. AESx(0xDABFBF65), AESx(0x31E6E6D7), AESx(0xC6424284), AESx(0xB86868D0),
  267. AESx(0xC3414182), AESx(0xB0999929), AESx(0x772D2D5A), AESx(0x110F0F1E),
  268. AESx(0xCBB0B07B), AESx(0xFC5454A8), AESx(0xD6BBBB6D), AESx(0x3A16162C)
  269. };
  270.  
  271. __constant const sph_u32 AES1_C[256] = {
  272. AESx(0x6363C6A5), AESx(0x7C7CF884), AESx(0x7777EE99), AESx(0x7B7BF68D),
  273. AESx(0xF2F2FF0D), AESx(0x6B6BD6BD), AESx(0x6F6FDEB1), AESx(0xC5C59154),
  274. AESx(0x30306050), AESx(0x01010203), AESx(0x6767CEA9), AESx(0x2B2B567D),
  275. AESx(0xFEFEE719), AESx(0xD7D7B562), AESx(0xABAB4DE6), AESx(0x7676EC9A),
  276. AESx(0xCACA8F45), AESx(0x82821F9D), AESx(0xC9C98940), AESx(0x7D7DFA87),
  277. AESx(0xFAFAEF15), AESx(0x5959B2EB), AESx(0x47478EC9), AESx(0xF0F0FB0B),
  278. AESx(0xADAD41EC), AESx(0xD4D4B367), AESx(0xA2A25FFD), AESx(0xAFAF45EA),
  279. AESx(0x9C9C23BF), AESx(0xA4A453F7), AESx(0x7272E496), AESx(0xC0C09B5B),
  280. AESx(0xB7B775C2), AESx(0xFDFDE11C), AESx(0x93933DAE), AESx(0x26264C6A),
  281. AESx(0x36366C5A), AESx(0x3F3F7E41), AESx(0xF7F7F502), AESx(0xCCCC834F),
  282. AESx(0x3434685C), AESx(0xA5A551F4), AESx(0xE5E5D134), AESx(0xF1F1F908),
  283. AESx(0x7171E293), AESx(0xD8D8AB73), AESx(0x31316253), AESx(0x15152A3F),
  284. AESx(0x0404080C), AESx(0xC7C79552), AESx(0x23234665), AESx(0xC3C39D5E),
  285. AESx(0x18183028), AESx(0x969637A1), AESx(0x05050A0F), AESx(0x9A9A2FB5),
  286. AESx(0x07070E09), AESx(0x12122436), AESx(0x80801B9B), AESx(0xE2E2DF3D),
  287. AESx(0xEBEBCD26), AESx(0x27274E69), AESx(0xB2B27FCD), AESx(0x7575EA9F),
  288. AESx(0x0909121B), AESx(0x83831D9E), AESx(0x2C2C5874), AESx(0x1A1A342E),
  289. AESx(0x1B1B362D), AESx(0x6E6EDCB2), AESx(0x5A5AB4EE), AESx(0xA0A05BFB),
  290. AESx(0x5252A4F6), AESx(0x3B3B764D), AESx(0xD6D6B761), AESx(0xB3B37DCE),
  291. AESx(0x2929527B), AESx(0xE3E3DD3E), AESx(0x2F2F5E71), AESx(0x84841397),
  292. AESx(0x5353A6F5), AESx(0xD1D1B968), AESx(0x00000000), AESx(0xEDEDC12C),
  293. AESx(0x20204060), AESx(0xFCFCE31F), AESx(0xB1B179C8), AESx(0x5B5BB6ED),
  294. AESx(0x6A6AD4BE), AESx(0xCBCB8D46), AESx(0xBEBE67D9), AESx(0x3939724B),
  295. AESx(0x4A4A94DE), AESx(0x4C4C98D4), AESx(0x5858B0E8), AESx(0xCFCF854A),
  296. AESx(0xD0D0BB6B), AESx(0xEFEFC52A), AESx(0xAAAA4FE5), AESx(0xFBFBED16),
  297. AESx(0x434386C5), AESx(0x4D4D9AD7), AESx(0x33336655), AESx(0x85851194),
  298. AESx(0x45458ACF), AESx(0xF9F9E910), AESx(0x02020406), AESx(0x7F7FFE81),
  299. AESx(0x5050A0F0), AESx(0x3C3C7844), AESx(0x9F9F25BA), AESx(0xA8A84BE3),
  300. AESx(0x5151A2F3), AESx(0xA3A35DFE), AESx(0x404080C0), AESx(0x8F8F058A),
  301. AESx(0x92923FAD), AESx(0x9D9D21BC), AESx(0x38387048), AESx(0xF5F5F104),
  302. AESx(0xBCBC63DF), AESx(0xB6B677C1), AESx(0xDADAAF75), AESx(0x21214263),
  303. AESx(0x10102030), AESx(0xFFFFE51A), AESx(0xF3F3FD0E), AESx(0xD2D2BF6D),
  304. AESx(0xCDCD814C), AESx(0x0C0C1814), AESx(0x13132635), AESx(0xECECC32F),
  305. AESx(0x5F5FBEE1), AESx(0x979735A2), AESx(0x444488CC), AESx(0x17172E39),
  306. AESx(0xC4C49357), AESx(0xA7A755F2), AESx(0x7E7EFC82), AESx(0x3D3D7A47),
  307. AESx(0x6464C8AC), AESx(0x5D5DBAE7), AESx(0x1919322B), AESx(0x7373E695),
  308. AESx(0x6060C0A0), AESx(0x81811998), AESx(0x4F4F9ED1), AESx(0xDCDCA37F),
  309. AESx(0x22224466), AESx(0x2A2A547E), AESx(0x90903BAB), AESx(0x88880B83),
  310. AESx(0x46468CCA), AESx(0xEEEEC729), AESx(0xB8B86BD3), AESx(0x1414283C),
  311. AESx(0xDEDEA779), AESx(0x5E5EBCE2), AESx(0x0B0B161D), AESx(0xDBDBAD76),
  312. AESx(0xE0E0DB3B), AESx(0x32326456), AESx(0x3A3A744E), AESx(0x0A0A141E),
  313. AESx(0x494992DB), AESx(0x06060C0A), AESx(0x2424486C), AESx(0x5C5CB8E4),
  314. AESx(0xC2C29F5D), AESx(0xD3D3BD6E), AESx(0xACAC43EF), AESx(0x6262C4A6),
  315. AESx(0x919139A8), AESx(0x959531A4), AESx(0xE4E4D337), AESx(0x7979F28B),
  316. AESx(0xE7E7D532), AESx(0xC8C88B43), AESx(0x37376E59), AESx(0x6D6DDAB7),
  317. AESx(0x8D8D018C), AESx(0xD5D5B164), AESx(0x4E4E9CD2), AESx(0xA9A949E0),
  318. AESx(0x6C6CD8B4), AESx(0x5656ACFA), AESx(0xF4F4F307), AESx(0xEAEACF25),
  319. AESx(0x6565CAAF), AESx(0x7A7AF48E), AESx(0xAEAE47E9), AESx(0x08081018),
  320. AESx(0xBABA6FD5), AESx(0x7878F088), AESx(0x25254A6F), AESx(0x2E2E5C72),
  321. AESx(0x1C1C3824), AESx(0xA6A657F1), AESx(0xB4B473C7), AESx(0xC6C69751),
  322. AESx(0xE8E8CB23), AESx(0xDDDDA17C), AESx(0x7474E89C), AESx(0x1F1F3E21),
  323. AESx(0x4B4B96DD), AESx(0xBDBD61DC), AESx(0x8B8B0D86), AESx(0x8A8A0F85),
  324. AESx(0x7070E090), AESx(0x3E3E7C42), AESx(0xB5B571C4), AESx(0x6666CCAA),
  325. AESx(0x484890D8), AESx(0x03030605), AESx(0xF6F6F701), AESx(0x0E0E1C12),
  326. AESx(0x6161C2A3), AESx(0x35356A5F), AESx(0x5757AEF9), AESx(0xB9B969D0),
  327. AESx(0x86861791), AESx(0xC1C19958), AESx(0x1D1D3A27), AESx(0x9E9E27B9),
  328. AESx(0xE1E1D938), AESx(0xF8F8EB13), AESx(0x98982BB3), AESx(0x11112233),
  329. AESx(0x6969D2BB), AESx(0xD9D9A970), AESx(0x8E8E0789), AESx(0x949433A7),
  330. AESx(0x9B9B2DB6), AESx(0x1E1E3C22), AESx(0x87871592), AESx(0xE9E9C920),
  331. AESx(0xCECE8749), AESx(0x5555AAFF), AESx(0x28285078), AESx(0xDFDFA57A),
  332. AESx(0x8C8C038F), AESx(0xA1A159F8), AESx(0x89890980), AESx(0x0D0D1A17),
  333. AESx(0xBFBF65DA), AESx(0xE6E6D731), AESx(0x424284C6), AESx(0x6868D0B8),
  334. AESx(0x414182C3), AESx(0x999929B0), AESx(0x2D2D5A77), AESx(0x0F0F1E11),
  335. AESx(0xB0B07BCB), AESx(0x5454A8FC), AESx(0xBBBB6DD6), AESx(0x16162C3A)
  336. };
  337.  
  338. __constant const sph_u32 AES2_C[256] = {
  339. AESx(0x63C6A563), AESx(0x7CF8847C), AESx(0x77EE9977), AESx(0x7BF68D7B),
  340. AESx(0xF2FF0DF2), AESx(0x6BD6BD6B), AESx(0x6FDEB16F), AESx(0xC59154C5),
  341. AESx(0x30605030), AESx(0x01020301), AESx(0x67CEA967), AESx(0x2B567D2B),
  342. AESx(0xFEE719FE), AESx(0xD7B562D7), AESx(0xAB4DE6AB), AESx(0x76EC9A76),
  343. AESx(0xCA8F45CA), AESx(0x821F9D82), AESx(0xC98940C9), AESx(0x7DFA877D),
  344. AESx(0xFAEF15FA), AESx(0x59B2EB59), AESx(0x478EC947), AESx(0xF0FB0BF0),
  345. AESx(0xAD41ECAD), AESx(0xD4B367D4), AESx(0xA25FFDA2), AESx(0xAF45EAAF),
  346. AESx(0x9C23BF9C), AESx(0xA453F7A4), AESx(0x72E49672), AESx(0xC09B5BC0),
  347. AESx(0xB775C2B7), AESx(0xFDE11CFD), AESx(0x933DAE93), AESx(0x264C6A26),
  348. AESx(0x366C5A36), AESx(0x3F7E413F), AESx(0xF7F502F7), AESx(0xCC834FCC),
  349. AESx(0x34685C34), AESx(0xA551F4A5), AESx(0xE5D134E5), AESx(0xF1F908F1),
  350. AESx(0x71E29371), AESx(0xD8AB73D8), AESx(0x31625331), AESx(0x152A3F15),
  351. AESx(0x04080C04), AESx(0xC79552C7), AESx(0x23466523), AESx(0xC39D5EC3),
  352. AESx(0x18302818), AESx(0x9637A196), AESx(0x050A0F05), AESx(0x9A2FB59A),
  353. AESx(0x070E0907), AESx(0x12243612), AESx(0x801B9B80), AESx(0xE2DF3DE2),
  354. AESx(0xEBCD26EB), AESx(0x274E6927), AESx(0xB27FCDB2), AESx(0x75EA9F75),
  355. AESx(0x09121B09), AESx(0x831D9E83), AESx(0x2C58742C), AESx(0x1A342E1A),
  356. AESx(0x1B362D1B), AESx(0x6EDCB26E), AESx(0x5AB4EE5A), AESx(0xA05BFBA0),
  357. AESx(0x52A4F652), AESx(0x3B764D3B), AESx(0xD6B761D6), AESx(0xB37DCEB3),
  358. AESx(0x29527B29), AESx(0xE3DD3EE3), AESx(0x2F5E712F), AESx(0x84139784),
  359. AESx(0x53A6F553), AESx(0xD1B968D1), AESx(0x00000000), AESx(0xEDC12CED),
  360. AESx(0x20406020), AESx(0xFCE31FFC), AESx(0xB179C8B1), AESx(0x5BB6ED5B),
  361. AESx(0x6AD4BE6A), AESx(0xCB8D46CB), AESx(0xBE67D9BE), AESx(0x39724B39),
  362. AESx(0x4A94DE4A), AESx(0x4C98D44C), AESx(0x58B0E858), AESx(0xCF854ACF),
  363. AESx(0xD0BB6BD0), AESx(0xEFC52AEF), AESx(0xAA4FE5AA), AESx(0xFBED16FB),
  364. AESx(0x4386C543), AESx(0x4D9AD74D), AESx(0x33665533), AESx(0x85119485),
  365. AESx(0x458ACF45), AESx(0xF9E910F9), AESx(0x02040602), AESx(0x7FFE817F),
  366. AESx(0x50A0F050), AESx(0x3C78443C), AESx(0x9F25BA9F), AESx(0xA84BE3A8),
  367. AESx(0x51A2F351), AESx(0xA35DFEA3), AESx(0x4080C040), AESx(0x8F058A8F),
  368. AESx(0x923FAD92), AESx(0x9D21BC9D), AESx(0x38704838), AESx(0xF5F104F5),
  369. AESx(0xBC63DFBC), AESx(0xB677C1B6), AESx(0xDAAF75DA), AESx(0x21426321),
  370. AESx(0x10203010), AESx(0xFFE51AFF), AESx(0xF3FD0EF3), AESx(0xD2BF6DD2),
  371. AESx(0xCD814CCD), AESx(0x0C18140C), AESx(0x13263513), AESx(0xECC32FEC),
  372. AESx(0x5FBEE15F), AESx(0x9735A297), AESx(0x4488CC44), AESx(0x172E3917),
  373. AESx(0xC49357C4), AESx(0xA755F2A7), AESx(0x7EFC827E), AESx(0x3D7A473D),
  374. AESx(0x64C8AC64), AESx(0x5DBAE75D), AESx(0x19322B19), AESx(0x73E69573),
  375. AESx(0x60C0A060), AESx(0x81199881), AESx(0x4F9ED14F), AESx(0xDCA37FDC),
  376. AESx(0x22446622), AESx(0x2A547E2A), AESx(0x903BAB90), AESx(0x880B8388),
  377. AESx(0x468CCA46), AESx(0xEEC729EE), AESx(0xB86BD3B8), AESx(0x14283C14),
  378. AESx(0xDEA779DE), AESx(0x5EBCE25E), AESx(0x0B161D0B), AESx(0xDBAD76DB),
  379. AESx(0xE0DB3BE0), AESx(0x32645632), AESx(0x3A744E3A), AESx(0x0A141E0A),
  380. AESx(0x4992DB49), AESx(0x060C0A06), AESx(0x24486C24), AESx(0x5CB8E45C),
  381. AESx(0xC29F5DC2), AESx(0xD3BD6ED3), AESx(0xAC43EFAC), AESx(0x62C4A662),
  382. AESx(0x9139A891), AESx(0x9531A495), AESx(0xE4D337E4), AESx(0x79F28B79),
  383. AESx(0xE7D532E7), AESx(0xC88B43C8), AESx(0x376E5937), AESx(0x6DDAB76D),
  384. AESx(0x8D018C8D), AESx(0xD5B164D5), AESx(0x4E9CD24E), AESx(0xA949E0A9),
  385. AESx(0x6CD8B46C), AESx(0x56ACFA56), AESx(0xF4F307F4), AESx(0xEACF25EA),
  386. AESx(0x65CAAF65), AESx(0x7AF48E7A), AESx(0xAE47E9AE), AESx(0x08101808),
  387. AESx(0xBA6FD5BA), AESx(0x78F08878), AESx(0x254A6F25), AESx(0x2E5C722E),
  388. AESx(0x1C38241C), AESx(0xA657F1A6), AESx(0xB473C7B4), AESx(0xC69751C6),
  389. AESx(0xE8CB23E8), AESx(0xDDA17CDD), AESx(0x74E89C74), AESx(0x1F3E211F),
  390. AESx(0x4B96DD4B), AESx(0xBD61DCBD), AESx(0x8B0D868B), AESx(0x8A0F858A),
  391. AESx(0x70E09070), AESx(0x3E7C423E), AESx(0xB571C4B5), AESx(0x66CCAA66),
  392. AESx(0x4890D848), AESx(0x03060503), AESx(0xF6F701F6), AESx(0x0E1C120E),
  393. AESx(0x61C2A361), AESx(0x356A5F35), AESx(0x57AEF957), AESx(0xB969D0B9),
  394. AESx(0x86179186), AESx(0xC19958C1), AESx(0x1D3A271D), AESx(0x9E27B99E),
  395. AESx(0xE1D938E1), AESx(0xF8EB13F8), AESx(0x982BB398), AESx(0x11223311),
  396. AESx(0x69D2BB69), AESx(0xD9A970D9), AESx(0x8E07898E), AESx(0x9433A794),
  397. AESx(0x9B2DB69B), AESx(0x1E3C221E), AESx(0x87159287), AESx(0xE9C920E9),
  398. AESx(0xCE8749CE), AESx(0x55AAFF55), AESx(0x28507828), AESx(0xDFA57ADF),
  399. AESx(0x8C038F8C), AESx(0xA159F8A1), AESx(0x89098089), AESx(0x0D1A170D),
  400. AESx(0xBF65DABF), AESx(0xE6D731E6), AESx(0x4284C642), AESx(0x68D0B868),
  401. AESx(0x4182C341), AESx(0x9929B099), AESx(0x2D5A772D), AESx(0x0F1E110F),
  402. AESx(0xB07BCBB0), AESx(0x54A8FC54), AESx(0xBB6DD6BB), AESx(0x162C3A16)
  403. };
  404.  
  405. __constant const sph_u32 AES3_C[256] = {
  406. AESx(0xC6A56363), AESx(0xF8847C7C), AESx(0xEE997777), AESx(0xF68D7B7B),
  407. AESx(0xFF0DF2F2), AESx(0xD6BD6B6B), AESx(0xDEB16F6F), AESx(0x9154C5C5),
  408. AESx(0x60503030), AESx(0x02030101), AESx(0xCEA96767), AESx(0x567D2B2B),
  409. AESx(0xE719FEFE), AESx(0xB562D7D7), AESx(0x4DE6ABAB), AESx(0xEC9A7676),
  410. AESx(0x8F45CACA), AESx(0x1F9D8282), AESx(0x8940C9C9), AESx(0xFA877D7D),
  411. AESx(0xEF15FAFA), AESx(0xB2EB5959), AESx(0x8EC94747), AESx(0xFB0BF0F0),
  412. AESx(0x41ECADAD), AESx(0xB367D4D4), AESx(0x5FFDA2A2), AESx(0x45EAAFAF),
  413. AESx(0x23BF9C9C), AESx(0x53F7A4A4), AESx(0xE4967272), AESx(0x9B5BC0C0),
  414. AESx(0x75C2B7B7), AESx(0xE11CFDFD), AESx(0x3DAE9393), AESx(0x4C6A2626),
  415. AESx(0x6C5A3636), AESx(0x7E413F3F), AESx(0xF502F7F7), AESx(0x834FCCCC),
  416. AESx(0x685C3434), AESx(0x51F4A5A5), AESx(0xD134E5E5), AESx(0xF908F1F1),
  417. AESx(0xE2937171), AESx(0xAB73D8D8), AESx(0x62533131), AESx(0x2A3F1515),
  418. AESx(0x080C0404), AESx(0x9552C7C7), AESx(0x46652323), AESx(0x9D5EC3C3),
  419. AESx(0x30281818), AESx(0x37A19696), AESx(0x0A0F0505), AESx(0x2FB59A9A),
  420. AESx(0x0E090707), AESx(0x24361212), AESx(0x1B9B8080), AESx(0xDF3DE2E2),
  421. AESx(0xCD26EBEB), AESx(0x4E692727), AESx(0x7FCDB2B2), AESx(0xEA9F7575),
  422. AESx(0x121B0909), AESx(0x1D9E8383), AESx(0x58742C2C), AESx(0x342E1A1A),
  423. AESx(0x362D1B1B), AESx(0xDCB26E6E), AESx(0xB4EE5A5A), AESx(0x5BFBA0A0),
  424. AESx(0xA4F65252), AESx(0x764D3B3B), AESx(0xB761D6D6), AESx(0x7DCEB3B3),
  425. AESx(0x527B2929), AESx(0xDD3EE3E3), AESx(0x5E712F2F), AESx(0x13978484),
  426. AESx(0xA6F55353), AESx(0xB968D1D1), AESx(0x00000000), AESx(0xC12CEDED),
  427. AESx(0x40602020), AESx(0xE31FFCFC), AESx(0x79C8B1B1), AESx(0xB6ED5B5B),
  428. AESx(0xD4BE6A6A), AESx(0x8D46CBCB), AESx(0x67D9BEBE), AESx(0x724B3939),
  429. AESx(0x94DE4A4A), AESx(0x98D44C4C), AESx(0xB0E85858), AESx(0x854ACFCF),
  430. AESx(0xBB6BD0D0), AESx(0xC52AEFEF), AESx(0x4FE5AAAA), AESx(0xED16FBFB),
  431. AESx(0x86C54343), AESx(0x9AD74D4D), AESx(0x66553333), AESx(0x11948585),
  432. AESx(0x8ACF4545), AESx(0xE910F9F9), AESx(0x04060202), AESx(0xFE817F7F),
  433. AESx(0xA0F05050), AESx(0x78443C3C), AESx(0x25BA9F9F), AESx(0x4BE3A8A8),
  434. AESx(0xA2F35151), AESx(0x5DFEA3A3), AESx(0x80C04040), AESx(0x058A8F8F),
  435. AESx(0x3FAD9292), AESx(0x21BC9D9D), AESx(0x70483838), AESx(0xF104F5F5),
  436. AESx(0x63DFBCBC), AESx(0x77C1B6B6), AESx(0xAF75DADA), AESx(0x42632121),
  437. AESx(0x20301010), AESx(0xE51AFFFF), AESx(0xFD0EF3F3), AESx(0xBF6DD2D2),
  438. AESx(0x814CCDCD), AESx(0x18140C0C), AESx(0x26351313), AESx(0xC32FECEC),
  439. AESx(0xBEE15F5F), AESx(0x35A29797), AESx(0x88CC4444), AESx(0x2E391717),
  440. AESx(0x9357C4C4), AESx(0x55F2A7A7), AESx(0xFC827E7E), AESx(0x7A473D3D),
  441. AESx(0xC8AC6464), AESx(0xBAE75D5D), AESx(0x322B1919), AESx(0xE6957373),
  442. AESx(0xC0A06060), AESx(0x19988181), AESx(0x9ED14F4F), AESx(0xA37FDCDC),
  443. AESx(0x44662222), AESx(0x547E2A2A), AESx(0x3BAB9090), AESx(0x0B838888),
  444. AESx(0x8CCA4646), AESx(0xC729EEEE), AESx(0x6BD3B8B8), AESx(0x283C1414),
  445. AESx(0xA779DEDE), AESx(0xBCE25E5E), AESx(0x161D0B0B), AESx(0xAD76DBDB),
  446. AESx(0xDB3BE0E0), AESx(0x64563232), AESx(0x744E3A3A), AESx(0x141E0A0A),
  447. AESx(0x92DB4949), AESx(0x0C0A0606), AESx(0x486C2424), AESx(0xB8E45C5C),
  448. AESx(0x9F5DC2C2), AESx(0xBD6ED3D3), AESx(0x43EFACAC), AESx(0xC4A66262),
  449. AESx(0x39A89191), AESx(0x31A49595), AESx(0xD337E4E4), AESx(0xF28B7979),
  450. AESx(0xD532E7E7), AESx(0x8B43C8C8), AESx(0x6E593737), AESx(0xDAB76D6D),
  451. AESx(0x018C8D8D), AESx(0xB164D5D5), AESx(0x9CD24E4E), AESx(0x49E0A9A9),
  452. AESx(0xD8B46C6C), AESx(0xACFA5656), AESx(0xF307F4F4), AESx(0xCF25EAEA),
  453. AESx(0xCAAF6565), AESx(0xF48E7A7A), AESx(0x47E9AEAE), AESx(0x10180808),
  454. AESx(0x6FD5BABA), AESx(0xF0887878), AESx(0x4A6F2525), AESx(0x5C722E2E),
  455. AESx(0x38241C1C), AESx(0x57F1A6A6), AESx(0x73C7B4B4), AESx(0x9751C6C6),
  456. AESx(0xCB23E8E8), AESx(0xA17CDDDD), AESx(0xE89C7474), AESx(0x3E211F1F),
  457. AESx(0x96DD4B4B), AESx(0x61DCBDBD), AESx(0x0D868B8B), AESx(0x0F858A8A),
  458. AESx(0xE0907070), AESx(0x7C423E3E), AESx(0x71C4B5B5), AESx(0xCCAA6666),
  459. AESx(0x90D84848), AESx(0x06050303), AESx(0xF701F6F6), AESx(0x1C120E0E),
  460. AESx(0xC2A36161), AESx(0x6A5F3535), AESx(0xAEF95757), AESx(0x69D0B9B9),
  461. AESx(0x17918686), AESx(0x9958C1C1), AESx(0x3A271D1D), AESx(0x27B99E9E),
  462. AESx(0xD938E1E1), AESx(0xEB13F8F8), AESx(0x2BB39898), AESx(0x22331111),
  463. AESx(0xD2BB6969), AESx(0xA970D9D9), AESx(0x07898E8E), AESx(0x33A79494),
  464. AESx(0x2DB69B9B), AESx(0x3C221E1E), AESx(0x15928787), AESx(0xC920E9E9),
  465. AESx(0x8749CECE), AESx(0xAAFF5555), AESx(0x50782828), AESx(0xA57ADFDF),
  466. AESx(0x038F8C8C), AESx(0x59F8A1A1), AESx(0x09808989), AESx(0x1A170D0D),
  467. AESx(0x65DABFBF), AESx(0xD731E6E6), AESx(0x84C64242), AESx(0xD0B86868),
  468. AESx(0x82C34141), AESx(0x29B09999), AESx(0x5A772D2D), AESx(0x1E110F0F),
  469. AESx(0x7BCBB0B0), AESx(0xA8FC5454), AESx(0x6DD6BBBB), AESx(0x2C3A1616)
  470. };
  471.  
  472. #endif
  473. /* $Id: blake.c 252 2011-06-07 17:55:14Z tp $ */
  474. /*
  475. * BLAKE implementation.
  476. *
  477. * ==========================(LICENSE BEGIN)============================
  478. *
  479. * Copyright (c) 2007-2010 Projet RNRT SAPHIR
  480. *
  481. * Permission is hereby granted, free of charge, to any person obtaining
  482. * a copy of this software and associated documentation files (the
  483. * "Software"), to deal in the Software without restriction, including
  484. * without limitation the rights to use, copy, modify, merge, publish,
  485. * distribute, sublicense, and/or sell copies of the Software, and to
  486. * permit persons to whom the Software is furnished to do so, subject to
  487. * the following conditions:
  488. *
  489. * The above copyright notice and this permission notice shall be
  490. * included in all copies or substantial portions of the Software.
  491. *
  492. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  493. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  494. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  495. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  496. * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  497. * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  498. * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  499. *
  500. * ===========================(LICENSE END)=============================
  501. *
  502. * @author Thomas Pornin <thomas.pornin@cryptolog.com>
  503. */
  504.  
  505. __constant const sph_u64 BLAKE_IV512[8] = {
  506. SPH_C64(0x6A09E667F3BCC908), SPH_C64(0xBB67AE8584CAA73B),
  507. SPH_C64(0x3C6EF372FE94F82B), SPH_C64(0xA54FF53A5F1D36F1),
  508. SPH_C64(0x510E527FADE682D1), SPH_C64(0x9B05688C2B3E6C1F),
  509. SPH_C64(0x1F83D9ABFB41BD6B), SPH_C64(0x5BE0CD19137E2179)
  510. };
  511.  
  512. #define Z00 0
  513. #define Z01 1
  514. #define Z02 2
  515. #define Z03 3
  516. #define Z04 4
  517. #define Z05 5
  518. #define Z06 6
  519. #define Z07 7
  520. #define Z08 8
  521. #define Z09 9
  522. #define Z0A A
  523. #define Z0B B
  524. #define Z0C C
  525. #define Z0D D
  526. #define Z0E E
  527. #define Z0F F
  528.  
  529. #define Z10 E
  530. #define Z11 A
  531. #define Z12 4
  532. #define Z13 8
  533. #define Z14 9
  534. #define Z15 F
  535. #define Z16 D
  536. #define Z17 6
  537. #define Z18 1
  538. #define Z19 C
  539. #define Z1A 0
  540. #define Z1B 2
  541. #define Z1C B
  542. #define Z1D 7
  543. #define Z1E 5
  544. #define Z1F 3
  545.  
  546. #define Z20 B
  547. #define Z21 8
  548. #define Z22 C
  549. #define Z23 0
  550. #define Z24 5
  551. #define Z25 2
  552. #define Z26 F
  553. #define Z27 D
  554. #define Z28 A
  555. #define Z29 E
  556. #define Z2A 3
  557. #define Z2B 6
  558. #define Z2C 7
  559. #define Z2D 1
  560. #define Z2E 9
  561. #define Z2F 4
  562.  
  563. #define Z30 7
  564. #define Z31 9
  565. #define Z32 3
  566. #define Z33 1
  567. #define Z34 D
  568. #define Z35 C
  569. #define Z36 B
  570. #define Z37 E
  571. #define Z38 2
  572. #define Z39 6
  573. #define Z3A 5
  574. #define Z3B A
  575. #define Z3C 4
  576. #define Z3D 0
  577. #define Z3E F
  578. #define Z3F 8
  579.  
  580. #define Z40 9
  581. #define Z41 0
  582. #define Z42 5
  583. #define Z43 7
  584. #define Z44 2
  585. #define Z45 4
  586. #define Z46 A
  587. #define Z47 F
  588. #define Z48 E
  589. #define Z49 1
  590. #define Z4A B
  591. #define Z4B C
  592. #define Z4C 6
  593. #define Z4D 8
  594. #define Z4E 3
  595. #define Z4F D
  596.  
  597. #define Z50 2
  598. #define Z51 C
  599. #define Z52 6
  600. #define Z53 A
  601. #define Z54 0
  602. #define Z55 B
  603. #define Z56 8
  604. #define Z57 3
  605. #define Z58 4
  606. #define Z59 D
  607. #define Z5A 7
  608. #define Z5B 5
  609. #define Z5C F
  610. #define Z5D E
  611. #define Z5E 1
  612. #define Z5F 9
  613.  
  614. #define Z60 C
  615. #define Z61 5
  616. #define Z62 1
  617. #define Z63 F
  618. #define Z64 E
  619. #define Z65 D
  620. #define Z66 4
  621. #define Z67 A
  622. #define Z68 0
  623. #define Z69 7
  624. #define Z6A 6
  625. #define Z6B 3
  626. #define Z6C 9
  627. #define Z6D 2
  628. #define Z6E 8
  629. #define Z6F B
  630.  
  631. #define Z70 D
  632. #define Z71 B
  633. #define Z72 7
  634. #define Z73 E
  635. #define Z74 C
  636. #define Z75 1
  637. #define Z76 3
  638. #define Z77 9
  639. #define Z78 5
  640. #define Z79 0
  641. #define Z7A F
  642. #define Z7B 4
  643. #define Z7C 8
  644. #define Z7D 6
  645. #define Z7E 2
  646. #define Z7F A
  647.  
  648. #define Z80 6
  649. #define Z81 F
  650. #define Z82 E
  651. #define Z83 9
  652. #define Z84 B
  653. #define Z85 3
  654. #define Z86 0
  655. #define Z87 8
  656. #define Z88 C
  657. #define Z89 2
  658. #define Z8A D
  659. #define Z8B 7
  660. #define Z8C 1
  661. #define Z8D 4
  662. #define Z8E A
  663. #define Z8F 5
  664.  
  665. #define Z90 A
  666. #define Z91 2
  667. #define Z92 8
  668. #define Z93 4
  669. #define Z94 7
  670. #define Z95 6
  671. #define Z96 1
  672. #define Z97 5
  673. #define Z98 F
  674. #define Z99 B
  675. #define Z9A 9
  676. #define Z9B E
  677. #define Z9C 3
  678. #define Z9D C
  679. #define Z9E D
  680. #define Z9F 0
  681.  
  682. #define Mx(r, i) Mx_(Z ## r ## i)
  683. #define Mx_(n) Mx__(n)
  684. #define Mx__(n) M ## n
  685.  
  686. #define CSx(r, i) CSx_(Z ## r ## i)
  687. #define CSx_(n) CSx__(n)
  688. #define CSx__(n) CS ## n
  689.  
  690. #define CS0 SPH_C32(0x243F6A88)
  691. #define CS1 SPH_C32(0x85A308D3)
  692. #define CS2 SPH_C32(0x13198A2E)
  693. #define CS3 SPH_C32(0x03707344)
  694. #define CS4 SPH_C32(0xA4093822)
  695. #define CS5 SPH_C32(0x299F31D0)
  696. #define CS6 SPH_C32(0x082EFA98)
  697. #define CS7 SPH_C32(0xEC4E6C89)
  698. #define CS8 SPH_C32(0x452821E6)
  699. #define CS9 SPH_C32(0x38D01377)
  700. #define CSA SPH_C32(0xBE5466CF)
  701. #define CSB SPH_C32(0x34E90C6C)
  702. #define CSC SPH_C32(0xC0AC29B7)
  703. #define CSD SPH_C32(0xC97C50DD)
  704. #define CSE SPH_C32(0x3F84D5B5)
  705. #define CSF SPH_C32(0xB5470917)
  706.  
  707. #if SPH_64
  708.  
  709. #define CBx(r, i) CBx_(Z ## r ## i)
  710. #define CBx_(n) CBx__(n)
  711. #define CBx__(n) CB ## n
  712.  
  713. #define CB0 SPH_C64(0x243F6A8885A308D3)
  714. #define CB1 SPH_C64(0x13198A2E03707344)
  715. #define CB2 SPH_C64(0xA4093822299F31D0)
  716. #define CB3 SPH_C64(0x082EFA98EC4E6C89)
  717. #define CB4 SPH_C64(0x452821E638D01377)
  718. #define CB5 SPH_C64(0xBE5466CF34E90C6C)
  719. #define CB6 SPH_C64(0xC0AC29B7C97C50DD)
  720. #define CB7 SPH_C64(0x3F84D5B5B5470917)
  721. #define CB8 SPH_C64(0x9216D5D98979FB1B)
  722. #define CB9 SPH_C64(0xD1310BA698DFB5AC)
  723. #define CBA SPH_C64(0x2FFD72DBD01ADFB7)
  724. #define CBB SPH_C64(0xB8E1AFED6A267E96)
  725. #define CBC SPH_C64(0xBA7C9045F12C7F99)
  726. #define CBD SPH_C64(0x24A19947B3916CF7)
  727. #define CBE SPH_C64(0x0801F2E2858EFC16)
  728. #define CBF SPH_C64(0x636920D871574E69)
  729.  
  730. #endif
  731.  
  732. #if SPH_64
  733.  
  734. #define GB(m0, m1, c0, c1, a, b, c, d) do { \
  735. a = SPH_T64(a + b + (m0 ^ c1)); \
  736. d = SPH_ROTR64(d ^ a, 32); \
  737. c = SPH_T64(c + d); \
  738. b = SPH_ROTR64(b ^ c, 25); \
  739. a = SPH_T64(a + b + (m1 ^ c0)); \
  740. d = SPH_ROTR64(d ^ a, 16); \
  741. c = SPH_T64(c + d); \
  742. b = SPH_ROTR64(b ^ c, 11); \
  743. } while (0)
  744.  
  745. #define ROUND_B(r) do { \
  746. GB(Mx(r, 0), Mx(r, 1), CBx(r, 0), CBx(r, 1), V0, V4, V8, VC); \
  747. GB(Mx(r, 2), Mx(r, 3), CBx(r, 2), CBx(r, 3), V1, V5, V9, VD); \
  748. GB(Mx(r, 4), Mx(r, 5), CBx(r, 4), CBx(r, 5), V2, V6, VA, VE); \
  749. GB(Mx(r, 6), Mx(r, 7), CBx(r, 6), CBx(r, 7), V3, V7, VB, VF); \
  750. GB(Mx(r, 8), Mx(r, 9), CBx(r, 8), CBx(r, 9), V0, V5, VA, VF); \
  751. GB(Mx(r, A), Mx(r, B), CBx(r, A), CBx(r, B), V1, V6, VB, VC); \
  752. GB(Mx(r, C), Mx(r, D), CBx(r, C), CBx(r, D), V2, V7, V8, VD); \
  753. GB(Mx(r, E), Mx(r, F), CBx(r, E), CBx(r, F), V3, V4, V9, VE); \
  754. } while (0)
  755.  
  756. #endif
  757.  
  758. #if SPH_64
  759.  
  760. #define BLAKE_DECL_STATE64 \
  761. sph_u64 H0, H1, H2, H3, H4, H5, H6, H7; \
  762. sph_u64 S0, S1, S2, S3, T0, T1;
  763.  
  764. #define BLAKE_READ_STATE64(state) do { \
  765. H0 = (state)->H[0]; \
  766. H1 = (state)->H[1]; \
  767. H2 = (state)->H[2]; \
  768. H3 = (state)->H[3]; \
  769. H4 = (state)->H[4]; \
  770. H5 = (state)->H[5]; \
  771. H6 = (state)->H[6]; \
  772. H7 = (state)->H[7]; \
  773. S0 = (state)->S[0]; \
  774. S1 = (state)->S[1]; \
  775. S2 = (state)->S[2]; \
  776. S3 = (state)->S[3]; \
  777. T0 = (state)->T0; \
  778. T1 = (state)->T1; \
  779. } while (0)
  780.  
  781. #define BLAKE_WRITE_STATE64(state) do { \
  782. (state)->H[0] = H0; \
  783. (state)->H[1] = H1; \
  784. (state)->H[2] = H2; \
  785. (state)->H[3] = H3; \
  786. (state)->H[4] = H4; \
  787. (state)->H[5] = H5; \
  788. (state)->H[6] = H6; \
  789. (state)->H[7] = H7; \
  790. (state)->S[0] = S0; \
  791. (state)->S[1] = S1; \
  792. (state)->S[2] = S2; \
  793. (state)->S[3] = S3; \
  794. (state)->T0 = T0; \
  795. (state)->T1 = T1; \
  796. } while (0)
  797.  
  798. #define COMPRESS64 do { \
  799. V0 = H0; \
  800. V1 = H1; \
  801. V2 = H2; \
  802. V3 = H3; \
  803. V4 = H4; \
  804. V5 = H5; \
  805. V6 = H6; \
  806. V7 = H7; \
  807. V8 = S0 ^ CB0; \
  808. V9 = S1 ^ CB1; \
  809. VA = S2 ^ CB2; \
  810. VB = S3 ^ CB3; \
  811. VC = T0 ^ CB4; \
  812. VD = T0 ^ CB5; \
  813. VE = T1 ^ CB6; \
  814. VF = T1 ^ CB7; \
  815. ROUND_B(0); \
  816. ROUND_B(1); \
  817. ROUND_B(2); \
  818. ROUND_B(3); \
  819. ROUND_B(4); \
  820. ROUND_B(5); \
  821. ROUND_B(6); \
  822. ROUND_B(7); \
  823. ROUND_B(8); \
  824. ROUND_B(9); \
  825. ROUND_B(0); \
  826. ROUND_B(1); \
  827. ROUND_B(2); \
  828. ROUND_B(3); \
  829. ROUND_B(4); \
  830. ROUND_B(5); \
  831. H0 ^= S0 ^ V0 ^ V8; \
  832. H1 ^= S1 ^ V1 ^ V9; \
  833. H2 ^= S2 ^ V2 ^ VA; \
  834. H3 ^= S3 ^ V3 ^ VB; \
  835. H4 ^= S0 ^ V4 ^ VC; \
  836. H5 ^= S1 ^ V5 ^ VD; \
  837. H6 ^= S2 ^ V6 ^ VE; \
  838. H7 ^= S3 ^ V7 ^ VF; \
  839. } while (0)
  840.  
  841. #endif
  842.  
  843. __constant const sph_u64 salt_zero_big[4] = { 0, 0, 0, 0 };
  844.  
  845. #ifdef __cplusplus
  846. }
  847. #endif
  848. /* $Id: bmw.c 227 2010-06-16 17:28:38Z tp $ */
  849. /*
  850. * BMW implementation.
  851. *
  852. * ==========================(LICENSE BEGIN)============================
  853. *
  854. * Copyright (c) 2007-2010 Projet RNRT SAPHIR
  855. *
  856. * Permission is hereby granted, free of charge, to any person obtaining
  857. * a copy of this software and associated documentation files (the
  858. * "Software"), to deal in the Software without restriction, including
  859. * without limitation the rights to use, copy, modify, merge, publish,
  860. * distribute, sublicense, and/or sell copies of the Software, and to
  861. * permit persons to whom the Software is furnished to do so, subject to
  862. * the following conditions:
  863. *
  864. * The above copyright notice and this permission notice shall be
  865. * included in all copies or substantial portions of the Software.
  866. *
  867. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  868. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  869. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  870. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  871. * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  872. * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  873. * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  874. *
  875. * ===========================(LICENSE END)=============================
  876. *
  877. * @author Thomas Pornin <thomas.pornin@cryptolog.com>
  878. */
  879.  
  880. __constant const sph_u64 BMW_IV512[] = {
  881. SPH_C64(0x8081828384858687), SPH_C64(0x88898A8B8C8D8E8F),
  882. SPH_C64(0x9091929394959697), SPH_C64(0x98999A9B9C9D9E9F),
  883. SPH_C64(0xA0A1A2A3A4A5A6A7), SPH_C64(0xA8A9AAABACADAEAF),
  884. SPH_C64(0xB0B1B2B3B4B5B6B7), SPH_C64(0xB8B9BABBBCBDBEBF),
  885. SPH_C64(0xC0C1C2C3C4C5C6C7), SPH_C64(0xC8C9CACBCCCDCECF),
  886. SPH_C64(0xD0D1D2D3D4D5D6D7), SPH_C64(0xD8D9DADBDCDDDEDF),
  887. SPH_C64(0xE0E1E2E3E4E5E6E7), SPH_C64(0xE8E9EAEBECEDEEEF),
  888. SPH_C64(0xF0F1F2F3F4F5F6F7), SPH_C64(0xF8F9FAFBFCFDFEFF)
  889. };
  890.  
  891. #define XCAT(x, y) XCAT_(x, y)
  892. #define XCAT_(x, y) x ## y
  893.  
  894. #define LPAR (
  895.  
  896. #define I16_16 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
  897. #define I16_17 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
  898. #define I16_18 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
  899. #define I16_19 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18
  900. #define I16_20 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
  901. #define I16_21 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20
  902. #define I16_22 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
  903. #define I16_23 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22
  904. #define I16_24 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23
  905. #define I16_25 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24
  906. #define I16_26 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25
  907. #define I16_27 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
  908. #define I16_28 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27
  909. #define I16_29 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28
  910. #define I16_30 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29
  911. #define I16_31 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30
  912.  
  913. #define M16_16 0, 1, 3, 4, 7, 10, 11
  914. #define M16_17 1, 2, 4, 5, 8, 11, 12
  915. #define M16_18 2, 3, 5, 6, 9, 12, 13
  916. #define M16_19 3, 4, 6, 7, 10, 13, 14
  917. #define M16_20 4, 5, 7, 8, 11, 14, 15
  918. #define M16_21 5, 6, 8, 9, 12, 15, 16
  919. #define M16_22 6, 7, 9, 10, 13, 0, 1
  920. #define M16_23 7, 8, 10, 11, 14, 1, 2
  921. #define M16_24 8, 9, 11, 12, 15, 2, 3
  922. #define M16_25 9, 10, 12, 13, 0, 3, 4
  923. #define M16_26 10, 11, 13, 14, 1, 4, 5
  924. #define M16_27 11, 12, 14, 15, 2, 5, 6
  925. #define M16_28 12, 13, 15, 16, 3, 6, 7
  926. #define M16_29 13, 14, 0, 1, 4, 7, 8
  927. #define M16_30 14, 15, 1, 2, 5, 8, 9
  928. #define M16_31 15, 16, 2, 3, 6, 9, 10
  929.  
  930. #define ss0(x) (((x) >> 1) ^ SPH_T32((x) << 3) \
  931. ^ SPH_ROTL32(x, 4) ^ SPH_ROTL32(x, 19))
  932. #define ss1(x) (((x) >> 1) ^ SPH_T32((x) << 2) \
  933. ^ SPH_ROTL32(x, 8) ^ SPH_ROTL32(x, 23))
  934. #define ss2(x) (((x) >> 2) ^ SPH_T32((x) << 1) \
  935. ^ SPH_ROTL32(x, 12) ^ SPH_ROTL32(x, 25))
  936. #define ss3(x) (((x) >> 2) ^ SPH_T32((x) << 2) \
  937. ^ SPH_ROTL32(x, 15) ^ SPH_ROTL32(x, 29))
  938. #define ss4(x) (((x) >> 1) ^ (x))
  939. #define ss5(x) (((x) >> 2) ^ (x))
  940. #define rs1(x) SPH_ROTL32(x, 3)
  941. #define rs2(x) SPH_ROTL32(x, 7)
  942. #define rs3(x) SPH_ROTL32(x, 13)
  943. #define rs4(x) SPH_ROTL32(x, 16)
  944. #define rs5(x) SPH_ROTL32(x, 19)
  945. #define rs6(x) SPH_ROTL32(x, 23)
  946. #define rs7(x) SPH_ROTL32(x, 27)
  947.  
  948. #define Ks(j) SPH_T32((sph_u32)(j) * SPH_C32(0x05555555))
  949.  
  950. #define add_elt_s(mf, hf, j0m, j1m, j3m, j4m, j7m, j10m, j11m, j16) \
  951. (SPH_T32(SPH_ROTL32(mf(j0m), j1m) + SPH_ROTL32(mf(j3m), j4m) \
  952. - SPH_ROTL32(mf(j10m), j11m) + Ks(j16)) ^ hf(j7m))
  953.  
  954. #define expand1s_inner(qf, mf, hf, i16, \
  955. i0, i1, i2, i3, i4, i5, i6, i7, i8, \
  956. i9, i10, i11, i12, i13, i14, i15, \
  957. i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
  958. SPH_T32(ss1(qf(i0)) + ss2(qf(i1)) + ss3(qf(i2)) + ss0(qf(i3)) \
  959. + ss1(qf(i4)) + ss2(qf(i5)) + ss3(qf(i6)) + ss0(qf(i7)) \
  960. + ss1(qf(i8)) + ss2(qf(i9)) + ss3(qf(i10)) + ss0(qf(i11)) \
  961. + ss1(qf(i12)) + ss2(qf(i13)) + ss3(qf(i14)) + ss0(qf(i15)) \
  962. + add_elt_s(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
  963.  
  964. #define expand1s(qf, mf, hf, i16) \
  965. expand1s_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
  966. #define expand1s_(qf, mf, hf, i16, ix, iy) \
  967. expand1s_inner LPAR qf, mf, hf, i16, ix, iy)
  968.  
  969. #define expand2s_inner(qf, mf, hf, i16, \
  970. i0, i1, i2, i3, i4, i5, i6, i7, i8, \
  971. i9, i10, i11, i12, i13, i14, i15, \
  972. i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
  973. SPH_T32(qf(i0) + rs1(qf(i1)) + qf(i2) + rs2(qf(i3)) \
  974. + qf(i4) + rs3(qf(i5)) + qf(i6) + rs4(qf(i7)) \
  975. + qf(i8) + rs5(qf(i9)) + qf(i10) + rs6(qf(i11)) \
  976. + qf(i12) + rs7(qf(i13)) + ss4(qf(i14)) + ss5(qf(i15)) \
  977. + add_elt_s(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
  978.  
  979. #define expand2s(qf, mf, hf, i16) \
  980. expand2s_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
  981. #define expand2s_(qf, mf, hf, i16, ix, iy) \
  982. expand2s_inner LPAR qf, mf, hf, i16, ix, iy)
  983.  
  984. #if SPH_64
  985.  
  986. #define sb0(x) (((x) >> 1) ^ SPH_T64((x) << 3) \
  987. ^ SPH_ROTL64(x, 4) ^ SPH_ROTL64(x, 37))
  988. #define sb1(x) (((x) >> 1) ^ SPH_T64((x) << 2) \
  989. ^ SPH_ROTL64(x, 13) ^ SPH_ROTL64(x, 43))
  990. #define sb2(x) (((x) >> 2) ^ SPH_T64((x) << 1) \
  991. ^ SPH_ROTL64(x, 19) ^ SPH_ROTL64(x, 53))
  992. #define sb3(x) (((x) >> 2) ^ SPH_T64((x) << 2) \
  993. ^ SPH_ROTL64(x, 28) ^ SPH_ROTL64(x, 59))
  994. #define sb4(x) (((x) >> 1) ^ (x))
  995. #define sb5(x) (((x) >> 2) ^ (x))
  996. #define rb1(x) SPH_ROTL64(x, 5)
  997. #define rb2(x) SPH_ROTL64(x, 11)
  998. #define rb3(x) SPH_ROTL64(x, 27)
  999. #define rb4(x) SPH_ROTL64(x, 32)
  1000. #define rb5(x) SPH_ROTL64(x, 37)
  1001. #define rb6(x) SPH_ROTL64(x, 43)
  1002. #define rb7(x) SPH_ROTL64(x, 53)
  1003.  
  1004. #define Kb(j) SPH_T64((sph_u64)(j) * SPH_C64(0x0555555555555555))
  1005.  
  1006. #define add_elt_b(mf, hf, j0m, j1m, j3m, j4m, j7m, j10m, j11m, j16) \
  1007. (SPH_T64(SPH_ROTL64(mf(j0m), j1m) + SPH_ROTL64(mf(j3m), j4m) \
  1008. - SPH_ROTL64(mf(j10m), j11m) + Kb(j16)) ^ hf(j7m))
  1009.  
  1010. #define expand1b_inner(qf, mf, hf, i16, \
  1011. i0, i1, i2, i3, i4, i5, i6, i7, i8, \
  1012. i9, i10, i11, i12, i13, i14, i15, \
  1013. i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
  1014. SPH_T64(sb1(qf(i0)) + sb2(qf(i1)) + sb3(qf(i2)) + sb0(qf(i3)) \
  1015. + sb1(qf(i4)) + sb2(qf(i5)) + sb3(qf(i6)) + sb0(qf(i7)) \
  1016. + sb1(qf(i8)) + sb2(qf(i9)) + sb3(qf(i10)) + sb0(qf(i11)) \
  1017. + sb1(qf(i12)) + sb2(qf(i13)) + sb3(qf(i14)) + sb0(qf(i15)) \
  1018. + add_elt_b(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
  1019.  
  1020. #define expand1b(qf, mf, hf, i16) \
  1021. expand1b_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
  1022. #define expand1b_(qf, mf, hf, i16, ix, iy) \
  1023. expand1b_inner LPAR qf, mf, hf, i16, ix, iy)
  1024.  
  1025. #define expand2b_inner(qf, mf, hf, i16, \
  1026. i0, i1, i2, i3, i4, i5, i6, i7, i8, \
  1027. i9, i10, i11, i12, i13, i14, i15, \
  1028. i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
  1029. SPH_T64(qf(i0) + rb1(qf(i1)) + qf(i2) + rb2(qf(i3)) \
  1030. + qf(i4) + rb3(qf(i5)) + qf(i6) + rb4(qf(i7)) \
  1031. + qf(i8) + rb5(qf(i9)) + qf(i10) + rb6(qf(i11)) \
  1032. + qf(i12) + rb7(qf(i13)) + sb4(qf(i14)) + sb5(qf(i15)) \
  1033. + add_elt_b(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
  1034.  
  1035. #define expand2b(qf, mf, hf, i16) \
  1036. expand2b_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
  1037. #define expand2b_(qf, mf, hf, i16, ix, iy) \
  1038. expand2b_inner LPAR qf, mf, hf, i16, ix, iy)
  1039.  
  1040. #endif
  1041.  
  1042. #define MAKE_W(tt, i0, op01, i1, op12, i2, op23, i3, op34, i4) \
  1043. tt((M(i0) ^ H(i0)) op01 (M(i1) ^ H(i1)) op12 (M(i2) ^ H(i2)) \
  1044. op23 (M(i3) ^ H(i3)) op34 (M(i4) ^ H(i4)))
  1045.  
  1046. #define Ws0 MAKE_W(SPH_T32, 5, -, 7, +, 10, +, 13, +, 14)
  1047. #define Ws1 MAKE_W(SPH_T32, 6, -, 8, +, 11, +, 14, -, 15)
  1048. #define Ws2 MAKE_W(SPH_T32, 0, +, 7, +, 9, -, 12, +, 15)
  1049. #define Ws3 MAKE_W(SPH_T32, 0, -, 1, +, 8, -, 10, +, 13)
  1050. #define Ws4 MAKE_W(SPH_T32, 1, +, 2, +, 9, -, 11, -, 14)
  1051. #define Ws5 MAKE_W(SPH_T32, 3, -, 2, +, 10, -, 12, +, 15)
  1052. #define Ws6 MAKE_W(SPH_T32, 4, -, 0, -, 3, -, 11, +, 13)
  1053. #define Ws7 MAKE_W(SPH_T32, 1, -, 4, -, 5, -, 12, -, 14)
  1054. #define Ws8 MAKE_W(SPH_T32, 2, -, 5, -, 6, +, 13, -, 15)
  1055. #define Ws9 MAKE_W(SPH_T32, 0, -, 3, +, 6, -, 7, +, 14)
  1056. #define Ws10 MAKE_W(SPH_T32, 8, -, 1, -, 4, -, 7, +, 15)
  1057. #define Ws11 MAKE_W(SPH_T32, 8, -, 0, -, 2, -, 5, +, 9)
  1058. #define Ws12 MAKE_W(SPH_T32, 1, +, 3, -, 6, -, 9, +, 10)
  1059. #define Ws13 MAKE_W(SPH_T32, 2, +, 4, +, 7, +, 10, +, 11)
  1060. #define Ws14 MAKE_W(SPH_T32, 3, -, 5, +, 8, -, 11, -, 12)
  1061. #define Ws15 MAKE_W(SPH_T32, 12, -, 4, -, 6, -, 9, +, 13)
  1062.  
  1063. #define MAKE_Qas do { \
  1064. qt[ 0] = SPH_T32(ss0(Ws0 ) + H( 1)); \
  1065. qt[ 1] = SPH_T32(ss1(Ws1 ) + H( 2)); \
  1066. qt[ 2] = SPH_T32(ss2(Ws2 ) + H( 3)); \
  1067. qt[ 3] = SPH_T32(ss3(Ws3 ) + H( 4)); \
  1068. qt[ 4] = SPH_T32(ss4(Ws4 ) + H( 5)); \
  1069. qt[ 5] = SPH_T32(ss0(Ws5 ) + H( 6)); \
  1070. qt[ 6] = SPH_T32(ss1(Ws6 ) + H( 7)); \
  1071. qt[ 7] = SPH_T32(ss2(Ws7 ) + H( 8)); \
  1072. qt[ 8] = SPH_T32(ss3(Ws8 ) + H( 9)); \
  1073. qt[ 9] = SPH_T32(ss4(Ws9 ) + H(10)); \
  1074. qt[10] = SPH_T32(ss0(Ws10) + H(11)); \
  1075. qt[11] = SPH_T32(ss1(Ws11) + H(12)); \
  1076. qt[12] = SPH_T32(ss2(Ws12) + H(13)); \
  1077. qt[13] = SPH_T32(ss3(Ws13) + H(14)); \
  1078. qt[14] = SPH_T32(ss4(Ws14) + H(15)); \
  1079. qt[15] = SPH_T32(ss0(Ws15) + H( 0)); \
  1080. } while (0)
  1081.  
  1082. #define MAKE_Qbs do { \
  1083. qt[16] = expand1s(Qs, M, H, 16); \
  1084. qt[17] = expand1s(Qs, M, H, 17); \
  1085. qt[18] = expand2s(Qs, M, H, 18); \
  1086. qt[19] = expand2s(Qs, M, H, 19); \
  1087. qt[20] = expand2s(Qs, M, H, 20); \
  1088. qt[21] = expand2s(Qs, M, H, 21); \
  1089. qt[22] = expand2s(Qs, M, H, 22); \
  1090. qt[23] = expand2s(Qs, M, H, 23); \
  1091. qt[24] = expand2s(Qs, M, H, 24); \
  1092. qt[25] = expand2s(Qs, M, H, 25); \
  1093. qt[26] = expand2s(Qs, M, H, 26); \
  1094. qt[27] = expand2s(Qs, M, H, 27); \
  1095. qt[28] = expand2s(Qs, M, H, 28); \
  1096. qt[29] = expand2s(Qs, M, H, 29); \
  1097. qt[30] = expand2s(Qs, M, H, 30); \
  1098. qt[31] = expand2s(Qs, M, H, 31); \
  1099. } while (0)
  1100.  
  1101. #define MAKE_Qs do { \
  1102. MAKE_Qas; \
  1103. MAKE_Qbs; \
  1104. } while (0)
  1105.  
  1106. #define Qs(j) (qt[j])
  1107.  
  1108. #if SPH_64
  1109.  
  1110. #define Wb0 MAKE_W(SPH_T64, 5, -, 7, +, 10, +, 13, +, 14)
  1111. #define Wb1 MAKE_W(SPH_T64, 6, -, 8, +, 11, +, 14, -, 15)
  1112. #define Wb2 MAKE_W(SPH_T64, 0, +, 7, +, 9, -, 12, +, 15)
  1113. #define Wb3 MAKE_W(SPH_T64, 0, -, 1, +, 8, -, 10, +, 13)
  1114. #define Wb4 MAKE_W(SPH_T64, 1, +, 2, +, 9, -, 11, -, 14)
  1115. #define Wb5 MAKE_W(SPH_T64, 3, -, 2, +, 10, -, 12, +, 15)
  1116. #define Wb6 MAKE_W(SPH_T64, 4, -, 0, -, 3, -, 11, +, 13)
  1117. #define Wb7 MAKE_W(SPH_T64, 1, -, 4, -, 5, -, 12, -, 14)
  1118. #define Wb8 MAKE_W(SPH_T64, 2, -, 5, -, 6, +, 13, -, 15)
  1119. #define Wb9 MAKE_W(SPH_T64, 0, -, 3, +, 6, -, 7, +, 14)
  1120. #define Wb10 MAKE_W(SPH_T64, 8, -, 1, -, 4, -, 7, +, 15)
  1121. #define Wb11 MAKE_W(SPH_T64, 8, -, 0, -, 2, -, 5, +, 9)
  1122. #define Wb12 MAKE_W(SPH_T64, 1, +, 3, -, 6, -, 9, +, 10)
  1123. #define Wb13 MAKE_W(SPH_T64, 2, +, 4, +, 7, +, 10, +, 11)
  1124. #define Wb14 MAKE_W(SPH_T64, 3, -, 5, +, 8, -, 11, -, 12)
  1125. #define Wb15 MAKE_W(SPH_T64, 12, -, 4, -, 6, -, 9, +, 13)
  1126.  
  1127. #define MAKE_Qab do { \
  1128. qt[ 0] = SPH_T64(sb0(Wb0 ) + H( 1)); \
  1129. qt[ 1] = SPH_T64(sb1(Wb1 ) + H( 2)); \
  1130. qt[ 2] = SPH_T64(sb2(Wb2 ) + H( 3)); \
  1131. qt[ 3] = SPH_T64(sb3(Wb3 ) + H( 4)); \
  1132. qt[ 4] = SPH_T64(sb4(Wb4 ) + H( 5)); \
  1133. qt[ 5] = SPH_T64(sb0(Wb5 ) + H( 6)); \
  1134. qt[ 6] = SPH_T64(sb1(Wb6 ) + H( 7)); \
  1135. qt[ 7] = SPH_T64(sb2(Wb7 ) + H( 8)); \
  1136. qt[ 8] = SPH_T64(sb3(Wb8 ) + H( 9)); \
  1137. qt[ 9] = SPH_T64(sb4(Wb9 ) + H(10)); \
  1138. qt[10] = SPH_T64(sb0(Wb10) + H(11)); \
  1139. qt[11] = SPH_T64(sb1(Wb11) + H(12)); \
  1140. qt[12] = SPH_T64(sb2(Wb12) + H(13)); \
  1141. qt[13] = SPH_T64(sb3(Wb13) + H(14)); \
  1142. qt[14] = SPH_T64(sb4(Wb14) + H(15)); \
  1143. qt[15] = SPH_T64(sb0(Wb15) + H( 0)); \
  1144. } while (0)
  1145.  
  1146. #define MAKE_Qbb do { \
  1147. qt[16] = expand1b(Qb, M, H, 16); \
  1148. qt[17] = expand1b(Qb, M, H, 17); \
  1149. qt[18] = expand2b(Qb, M, H, 18); \
  1150. qt[19] = expand2b(Qb, M, H, 19); \
  1151. qt[20] = expand2b(Qb, M, H, 20); \
  1152. qt[21] = expand2b(Qb, M, H, 21); \
  1153. qt[22] = expand2b(Qb, M, H, 22); \
  1154. qt[23] = expand2b(Qb, M, H, 23); \
  1155. qt[24] = expand2b(Qb, M, H, 24); \
  1156. qt[25] = expand2b(Qb, M, H, 25); \
  1157. qt[26] = expand2b(Qb, M, H, 26); \
  1158. qt[27] = expand2b(Qb, M, H, 27); \
  1159. qt[28] = expand2b(Qb, M, H, 28); \
  1160. qt[29] = expand2b(Qb, M, H, 29); \
  1161. qt[30] = expand2b(Qb, M, H, 30); \
  1162. qt[31] = expand2b(Qb, M, H, 31); \
  1163. } while (0)
  1164.  
  1165. #define MAKE_Qb do { \
  1166. MAKE_Qab; \
  1167. MAKE_Qbb; \
  1168. } while (0)
  1169.  
  1170. #define Qb(j) (qt[j])
  1171.  
  1172. #endif
  1173.  
  1174. #define FOLD(type, mkQ, tt, rol, mf, qf, dhf) do { \
  1175. type qt[32], xl, xh; \
  1176. mkQ; \
  1177. xl = qf(16) ^ qf(17) ^ qf(18) ^ qf(19) \
  1178. ^ qf(20) ^ qf(21) ^ qf(22) ^ qf(23); \
  1179. xh = xl ^ qf(24) ^ qf(25) ^ qf(26) ^ qf(27) \
  1180. ^ qf(28) ^ qf(29) ^ qf(30) ^ qf(31); \
  1181. dhf( 0) = tt(((xh << 5) ^ (qf(16) >> 5) ^ mf( 0)) \
  1182. + (xl ^ qf(24) ^ qf( 0))); \
  1183. dhf( 1) = tt(((xh >> 7) ^ (qf(17) << 8) ^ mf( 1)) \
  1184. + (xl ^ qf(25) ^ qf( 1))); \
  1185. dhf( 2) = tt(((xh >> 5) ^ (qf(18) << 5) ^ mf( 2)) \
  1186. + (xl ^ qf(26) ^ qf( 2))); \
  1187. dhf( 3) = tt(((xh >> 1) ^ (qf(19) << 5) ^ mf( 3)) \
  1188. + (xl ^ qf(27) ^ qf( 3))); \
  1189. dhf( 4) = tt(((xh >> 3) ^ (qf(20) << 0) ^ mf( 4)) \
  1190. + (xl ^ qf(28) ^ qf( 4))); \
  1191. dhf( 5) = tt(((xh << 6) ^ (qf(21) >> 6) ^ mf( 5)) \
  1192. + (xl ^ qf(29) ^ qf( 5))); \
  1193. dhf( 6) = tt(((xh >> 4) ^ (qf(22) << 6) ^ mf( 6)) \
  1194. + (xl ^ qf(30) ^ qf( 6))); \
  1195. dhf( 7) = tt(((xh >> 11) ^ (qf(23) << 2) ^ mf( 7)) \
  1196. + (xl ^ qf(31) ^ qf( 7))); \
  1197. dhf( 8) = tt(rol(dhf(4), 9) + (xh ^ qf(24) ^ mf( 8)) \
  1198. + ((xl << 8) ^ qf(23) ^ qf( 8))); \
  1199. dhf( 9) = tt(rol(dhf(5), 10) + (xh ^ qf(25) ^ mf( 9)) \
  1200. + ((xl >> 6) ^ qf(16) ^ qf( 9))); \
  1201. dhf(10) = tt(rol(dhf(6), 11) + (xh ^ qf(26) ^ mf(10)) \
  1202. + ((xl << 6) ^ qf(17) ^ qf(10))); \
  1203. dhf(11) = tt(rol(dhf(7), 12) + (xh ^ qf(27) ^ mf(11)) \
  1204. + ((xl << 4) ^ qf(18) ^ qf(11))); \
  1205. dhf(12) = tt(rol(dhf(0), 13) + (xh ^ qf(28) ^ mf(12)) \
  1206. + ((xl >> 3) ^ qf(19) ^ qf(12))); \
  1207. dhf(13) = tt(rol(dhf(1), 14) + (xh ^ qf(29) ^ mf(13)) \
  1208. + ((xl >> 4) ^ qf(20) ^ qf(13))); \
  1209. dhf(14) = tt(rol(dhf(2), 15) + (xh ^ qf(30) ^ mf(14)) \
  1210. + ((xl >> 7) ^ qf(21) ^ qf(14))); \
  1211. dhf(15) = tt(rol(dhf(3), 16) + (xh ^ qf(31) ^ mf(15)) \
  1212. + ((xl >> 2) ^ qf(22) ^ qf(15))); \
  1213. } while (0)
  1214.  
  1215. #define FOLDb FOLD(sph_u64, MAKE_Qb, SPH_T64, SPH_ROTL64, M, Qb, dH)
  1216.  
  1217. __constant const sph_u64 final_b[16] = {
  1218. SPH_C64(0xaaaaaaaaaaaaaaa0), SPH_C64(0xaaaaaaaaaaaaaaa1),
  1219. SPH_C64(0xaaaaaaaaaaaaaaa2), SPH_C64(0xaaaaaaaaaaaaaaa3),
  1220. SPH_C64(0xaaaaaaaaaaaaaaa4), SPH_C64(0xaaaaaaaaaaaaaaa5),
  1221. SPH_C64(0xaaaaaaaaaaaaaaa6), SPH_C64(0xaaaaaaaaaaaaaaa7),
  1222. SPH_C64(0xaaaaaaaaaaaaaaa8), SPH_C64(0xaaaaaaaaaaaaaaa9),
  1223. SPH_C64(0xaaaaaaaaaaaaaaaa), SPH_C64(0xaaaaaaaaaaaaaaab),
  1224. SPH_C64(0xaaaaaaaaaaaaaaac), SPH_C64(0xaaaaaaaaaaaaaaad),
  1225. SPH_C64(0xaaaaaaaaaaaaaaae), SPH_C64(0xaaaaaaaaaaaaaaaf)
  1226. };
  1227.  
  1228. /* $Id: groestl.c 260 2011-07-21 01:02:38Z tp $ */
  1229. /*
  1230. * Groestl implementation.
  1231. *
  1232. * ==========================(LICENSE BEGIN)============================
  1233. *
  1234. * Copyright (c) 2007-2010 Projet RNRT SAPHIR
  1235. *
  1236. * Permission is hereby granted, free of charge, to any person obtaining
  1237. * a copy of this software and associated documentation files (the
  1238. * "Software"), to deal in the Software without restriction, including
  1239. * without limitation the rights to use, copy, modify, merge, publish,
  1240. * distribute, sublicense, and/or sell copies of the Software, and to
  1241. * permit persons to whom the Software is furnished to do so, subject to
  1242. * the following conditions:
  1243. *
  1244. * The above copyright notice and this permission notice shall be
  1245. * included in all copies or substantial portions of the Software.
  1246. *
  1247. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  1248. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  1249. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  1250. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  1251. * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  1252. * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  1253. * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  1254. *
  1255. * ===========================(LICENSE END)=============================
  1256. *
  1257. * @author Thomas Pornin <thomas.pornin@cryptolog.com>
  1258. */
  1259.  
  1260. /*
  1261. * Apparently, the 32-bit-only version is not faster than the 64-bit
  1262. * version unless using the "small footprint" code on a 32-bit machine.
  1263. */
  1264. #if !defined SPH_GROESTL_64
  1265. #if SPH_SMALL_FOOTPRINT_GROESTL && !SPH_64_TRUE
  1266. #define SPH_GROESTL_64 0
  1267. #else
  1268. #define SPH_GROESTL_64 1
  1269. #endif
  1270. #endif
  1271.  
  1272. /*
  1273. * The internal representation may use either big-endian or
  1274. * little-endian. Using the platform default representation speeds up
  1275. * encoding and decoding between bytes and the matrix columns.
  1276. */
  1277.  
  1278. #undef USE_LE
  1279. #if SPH_GROESTL_LITTLE_ENDIAN
  1280. #define USE_LE 1
  1281. #elif SPH_GROESTL_BIG_ENDIAN
  1282. #define USE_LE 0
  1283. #elif SPH_LITTLE_ENDIAN
  1284. #define USE_LE 1
  1285. #endif
  1286.  
  1287. #if USE_LE
  1288.  
  1289. #if SPH_64
  1290. #define C64e(x) ((SPH_C64(x) >> 56) \
  1291. | ((SPH_C64(x) >> 40) & SPH_C64(0x000000000000FF00)) \
  1292. | ((SPH_C64(x) >> 24) & SPH_C64(0x0000000000FF0000)) \
  1293. | ((SPH_C64(x) >> 8) & SPH_C64(0x00000000FF000000)) \
  1294. | ((SPH_C64(x) << 8) & SPH_C64(0x000000FF00000000)) \
  1295. | ((SPH_C64(x) << 24) & SPH_C64(0x0000FF0000000000)) \
  1296. | ((SPH_C64(x) << 40) & SPH_C64(0x00FF000000000000)) \
  1297. | ((SPH_C64(x) << 56) & SPH_C64(0xFF00000000000000)))
  1298. #define dec64e_aligned sph_dec64le_aligned
  1299. #define enc64e sph_enc64le
  1300. #define B64_0(x) as_uchar8(x).s0
  1301. #define B64_1(x) as_uchar8(x).s1
  1302. #define B64_2(x) as_uchar8(x).s2
  1303. #define B64_3(x) as_uchar8(x).s3
  1304. #define B64_4(x) as_uchar8(x).s4
  1305. #define B64_5(x) as_uchar8(x).s5
  1306. #define B64_6(x) as_uchar8(x).s6
  1307. #define B64_7(x) as_uchar8(x).s7
  1308. #define R64 SPH_ROTL64
  1309. #define PC64(j, r) ((sph_u64)((j) + (r)))
  1310. #define QC64(j, r) (((sph_u64)(r) << 56) ^ SPH_T64(~((sph_u64)(j) << 56)))
  1311. #endif
  1312.  
  1313. #else
  1314.  
  1315. #if SPH_64
  1316. #define C64e(x) SPH_C64(x)
  1317. #define dec64e_aligned sph_dec64be_aligned
  1318. #define enc64e sph_enc64be
  1319. #define B64_0(x) ((x) >> 56)
  1320. #define B64_1(x) (((x) >> 48) & 0xFF)
  1321. #define B64_2(x) (((x) >> 40) & 0xFF)
  1322. #define B64_3(x) (((x) >> 32) & 0xFF)
  1323. #define B64_4(x) (((x) >> 24) & 0xFF)
  1324. #define B64_5(x) (((x) >> 16) & 0xFF)
  1325. #define B64_6(x) (((x) >> 8) & 0xFF)
  1326. #define B64_7(x) ((x) & 0xFF)
  1327. #define R64 SPH_ROTR64
  1328. #define PC64(j, r) ((sph_u64)((j) + (r)) << 56)
  1329. #define QC64(j, r) ((sph_u64)(r) ^ SPH_T64(~(sph_u64)(j)))
  1330. #endif
  1331.  
  1332. #endif
  1333.  
  1334. __constant const sph_u64 T0_C[] = {
  1335. C64e(0xc632f4a5f497a5c6), C64e(0xf86f978497eb84f8),
  1336. C64e(0xee5eb099b0c799ee), C64e(0xf67a8c8d8cf78df6),
  1337. C64e(0xffe8170d17e50dff), C64e(0xd60adcbddcb7bdd6),
  1338. C64e(0xde16c8b1c8a7b1de), C64e(0x916dfc54fc395491),
  1339. C64e(0x6090f050f0c05060), C64e(0x0207050305040302),
  1340. C64e(0xce2ee0a9e087a9ce), C64e(0x56d1877d87ac7d56),
  1341. C64e(0xe7cc2b192bd519e7), C64e(0xb513a662a67162b5),
  1342. C64e(0x4d7c31e6319ae64d), C64e(0xec59b59ab5c39aec),
  1343. C64e(0x8f40cf45cf05458f), C64e(0x1fa3bc9dbc3e9d1f),
  1344. C64e(0x8949c040c0094089), C64e(0xfa68928792ef87fa),
  1345. C64e(0xefd03f153fc515ef), C64e(0xb29426eb267febb2),
  1346. C64e(0x8ece40c94007c98e), C64e(0xfbe61d0b1ded0bfb),
  1347. C64e(0x416e2fec2f82ec41), C64e(0xb31aa967a97d67b3),
  1348. C64e(0x5f431cfd1cbefd5f), C64e(0x456025ea258aea45),
  1349. C64e(0x23f9dabfda46bf23), C64e(0x535102f702a6f753),
  1350. C64e(0xe445a196a1d396e4), C64e(0x9b76ed5bed2d5b9b),
  1351. C64e(0x75285dc25deac275), C64e(0xe1c5241c24d91ce1),
  1352. C64e(0x3dd4e9aee97aae3d), C64e(0x4cf2be6abe986a4c),
  1353. C64e(0x6c82ee5aeed85a6c), C64e(0x7ebdc341c3fc417e),
  1354. C64e(0xf5f3060206f102f5), C64e(0x8352d14fd11d4f83),
  1355. C64e(0x688ce45ce4d05c68), C64e(0x515607f407a2f451),
  1356. C64e(0xd18d5c345cb934d1), C64e(0xf9e1180818e908f9),
  1357. C64e(0xe24cae93aedf93e2), C64e(0xab3e9573954d73ab),
  1358. C64e(0x6297f553f5c45362), C64e(0x2a6b413f41543f2a),
  1359. C64e(0x081c140c14100c08), C64e(0x9563f652f6315295),
  1360. C64e(0x46e9af65af8c6546), C64e(0x9d7fe25ee2215e9d),
  1361. C64e(0x3048782878602830), C64e(0x37cff8a1f86ea137),
  1362. C64e(0x0a1b110f11140f0a), C64e(0x2febc4b5c45eb52f),
  1363. C64e(0x0e151b091b1c090e), C64e(0x247e5a365a483624),
  1364. C64e(0x1badb69bb6369b1b), C64e(0xdf98473d47a53ddf),
  1365. C64e(0xcda76a266a8126cd), C64e(0x4ef5bb69bb9c694e),
  1366. C64e(0x7f334ccd4cfecd7f), C64e(0xea50ba9fbacf9fea),
  1367. C64e(0x123f2d1b2d241b12), C64e(0x1da4b99eb93a9e1d),
  1368. C64e(0x58c49c749cb07458), C64e(0x3446722e72682e34),
  1369. C64e(0x3641772d776c2d36), C64e(0xdc11cdb2cda3b2dc),
  1370. C64e(0xb49d29ee2973eeb4), C64e(0x5b4d16fb16b6fb5b),
  1371. C64e(0xa4a501f60153f6a4), C64e(0x76a1d74dd7ec4d76),
  1372. C64e(0xb714a361a37561b7), C64e(0x7d3449ce49face7d),
  1373. C64e(0x52df8d7b8da47b52), C64e(0xdd9f423e42a13edd),
  1374. C64e(0x5ecd937193bc715e), C64e(0x13b1a297a2269713),
  1375. C64e(0xa6a204f50457f5a6), C64e(0xb901b868b86968b9),
  1376. C64e(0x0000000000000000), C64e(0xc1b5742c74992cc1),
  1377. C64e(0x40e0a060a0806040), C64e(0xe3c2211f21dd1fe3),
  1378. C64e(0x793a43c843f2c879), C64e(0xb69a2ced2c77edb6),
  1379. C64e(0xd40dd9bed9b3bed4), C64e(0x8d47ca46ca01468d),
  1380. C64e(0x671770d970ced967), C64e(0x72afdd4bdde44b72),
  1381. C64e(0x94ed79de7933de94), C64e(0x98ff67d4672bd498),
  1382. C64e(0xb09323e8237be8b0), C64e(0x855bde4ade114a85),
  1383. C64e(0xbb06bd6bbd6d6bbb), C64e(0xc5bb7e2a7e912ac5),
  1384. C64e(0x4f7b34e5349ee54f), C64e(0xedd73a163ac116ed),
  1385. C64e(0x86d254c55417c586), C64e(0x9af862d7622fd79a),
  1386. C64e(0x6699ff55ffcc5566), C64e(0x11b6a794a7229411),
  1387. C64e(0x8ac04acf4a0fcf8a), C64e(0xe9d9301030c910e9),
  1388. C64e(0x040e0a060a080604), C64e(0xfe66988198e781fe),
  1389. C64e(0xa0ab0bf00b5bf0a0), C64e(0x78b4cc44ccf04478),
  1390. C64e(0x25f0d5bad54aba25), C64e(0x4b753ee33e96e34b),
  1391. C64e(0xa2ac0ef30e5ff3a2), C64e(0x5d4419fe19bafe5d),
  1392. C64e(0x80db5bc05b1bc080), C64e(0x0580858a850a8a05),
  1393. C64e(0x3fd3ecadec7ead3f), C64e(0x21fedfbcdf42bc21),
  1394. C64e(0x70a8d848d8e04870), C64e(0xf1fd0c040cf904f1),
  1395. C64e(0x63197adf7ac6df63), C64e(0x772f58c158eec177),
  1396. C64e(0xaf309f759f4575af), C64e(0x42e7a563a5846342),
  1397. C64e(0x2070503050403020), C64e(0xe5cb2e1a2ed11ae5),
  1398. C64e(0xfdef120e12e10efd), C64e(0xbf08b76db7656dbf),
  1399. C64e(0x8155d44cd4194c81), C64e(0x18243c143c301418),
  1400. C64e(0x26795f355f4c3526), C64e(0xc3b2712f719d2fc3),
  1401. C64e(0xbe8638e13867e1be), C64e(0x35c8fda2fd6aa235),
  1402. C64e(0x88c74fcc4f0bcc88), C64e(0x2e654b394b5c392e),
  1403. C64e(0x936af957f93d5793), C64e(0x55580df20daaf255),
  1404. C64e(0xfc619d829de382fc), C64e(0x7ab3c947c9f4477a),
  1405. C64e(0xc827efacef8bacc8), C64e(0xba8832e7326fe7ba),
  1406. C64e(0x324f7d2b7d642b32), C64e(0xe642a495a4d795e6),
  1407. C64e(0xc03bfba0fb9ba0c0), C64e(0x19aab398b3329819),
  1408. C64e(0x9ef668d16827d19e), C64e(0xa322817f815d7fa3),
  1409. C64e(0x44eeaa66aa886644), C64e(0x54d6827e82a87e54),
  1410. C64e(0x3bdde6abe676ab3b), C64e(0x0b959e839e16830b),
  1411. C64e(0x8cc945ca4503ca8c), C64e(0xc7bc7b297b9529c7),
  1412. C64e(0x6b056ed36ed6d36b), C64e(0x286c443c44503c28),
  1413. C64e(0xa72c8b798b5579a7), C64e(0xbc813de23d63e2bc),
  1414. C64e(0x1631271d272c1d16), C64e(0xad379a769a4176ad),
  1415. C64e(0xdb964d3b4dad3bdb), C64e(0x649efa56fac85664),
  1416. C64e(0x74a6d24ed2e84e74), C64e(0x1436221e22281e14),
  1417. C64e(0x92e476db763fdb92), C64e(0x0c121e0a1e180a0c),
  1418. C64e(0x48fcb46cb4906c48), C64e(0xb88f37e4376be4b8),
  1419. C64e(0x9f78e75de7255d9f), C64e(0xbd0fb26eb2616ebd),
  1420. C64e(0x43692aef2a86ef43), C64e(0xc435f1a6f193a6c4),
  1421. C64e(0x39dae3a8e372a839), C64e(0x31c6f7a4f762a431),
  1422. C64e(0xd38a593759bd37d3), C64e(0xf274868b86ff8bf2),
  1423. C64e(0xd583563256b132d5), C64e(0x8b4ec543c50d438b),
  1424. C64e(0x6e85eb59ebdc596e), C64e(0xda18c2b7c2afb7da),
  1425. C64e(0x018e8f8c8f028c01), C64e(0xb11dac64ac7964b1),
  1426. C64e(0x9cf16dd26d23d29c), C64e(0x49723be03b92e049),
  1427. C64e(0xd81fc7b4c7abb4d8), C64e(0xacb915fa1543faac),
  1428. C64e(0xf3fa090709fd07f3), C64e(0xcfa06f256f8525cf),
  1429. C64e(0xca20eaafea8fafca), C64e(0xf47d898e89f38ef4),
  1430. C64e(0x476720e9208ee947), C64e(0x1038281828201810),
  1431. C64e(0x6f0b64d564ded56f), C64e(0xf073838883fb88f0),
  1432. C64e(0x4afbb16fb1946f4a), C64e(0x5cca967296b8725c),
  1433. C64e(0x38546c246c702438), C64e(0x575f08f108aef157),
  1434. C64e(0x732152c752e6c773), C64e(0x9764f351f3355197),
  1435. C64e(0xcbae6523658d23cb), C64e(0xa125847c84597ca1),
  1436. C64e(0xe857bf9cbfcb9ce8), C64e(0x3e5d6321637c213e),
  1437. C64e(0x96ea7cdd7c37dd96), C64e(0x611e7fdc7fc2dc61),
  1438. C64e(0x0d9c9186911a860d), C64e(0x0f9b9485941e850f),
  1439. C64e(0xe04bab90abdb90e0), C64e(0x7cbac642c6f8427c),
  1440. C64e(0x712657c457e2c471), C64e(0xcc29e5aae583aacc),
  1441. C64e(0x90e373d8733bd890), C64e(0x06090f050f0c0506),
  1442. C64e(0xf7f4030103f501f7), C64e(0x1c2a36123638121c),
  1443. C64e(0xc23cfea3fe9fa3c2), C64e(0x6a8be15fe1d45f6a),
  1444. C64e(0xaebe10f91047f9ae), C64e(0x69026bd06bd2d069),
  1445. C64e(0x17bfa891a82e9117), C64e(0x9971e858e8295899),
  1446. C64e(0x3a5369276974273a), C64e(0x27f7d0b9d04eb927),
  1447. C64e(0xd991483848a938d9), C64e(0xebde351335cd13eb),
  1448. C64e(0x2be5ceb3ce56b32b), C64e(0x2277553355443322),
  1449. C64e(0xd204d6bbd6bfbbd2), C64e(0xa9399070904970a9),
  1450. C64e(0x07878089800e8907), C64e(0x33c1f2a7f266a733),
  1451. C64e(0x2decc1b6c15ab62d), C64e(0x3c5a66226678223c),
  1452. C64e(0x15b8ad92ad2a9215), C64e(0xc9a96020608920c9),
  1453. C64e(0x875cdb49db154987), C64e(0xaab01aff1a4fffaa),
  1454. C64e(0x50d8887888a07850), C64e(0xa52b8e7a8e517aa5),
  1455. C64e(0x03898a8f8a068f03), C64e(0x594a13f813b2f859),
  1456. C64e(0x09929b809b128009), C64e(0x1a2339173934171a),
  1457. C64e(0x651075da75cada65), C64e(0xd784533153b531d7),
  1458. C64e(0x84d551c65113c684), C64e(0xd003d3b8d3bbb8d0),
  1459. C64e(0x82dc5ec35e1fc382), C64e(0x29e2cbb0cb52b029),
  1460. C64e(0x5ac3997799b4775a), C64e(0x1e2d3311333c111e),
  1461. C64e(0x7b3d46cb46f6cb7b), C64e(0xa8b71ffc1f4bfca8),
  1462. C64e(0x6d0c61d661dad66d), C64e(0x2c624e3a4e583a2c)
  1463. };
  1464.  
  1465. #if !SPH_SMALL_FOOTPRINT_GROESTL
  1466.  
  1467. __constant const sph_u64 T1_C[] = {
  1468. C64e(0xc6c632f4a5f497a5), C64e(0xf8f86f978497eb84),
  1469. C64e(0xeeee5eb099b0c799), C64e(0xf6f67a8c8d8cf78d),
  1470. C64e(0xffffe8170d17e50d), C64e(0xd6d60adcbddcb7bd),
  1471. C64e(0xdede16c8b1c8a7b1), C64e(0x91916dfc54fc3954),
  1472. C64e(0x606090f050f0c050), C64e(0x0202070503050403),
  1473. C64e(0xcece2ee0a9e087a9), C64e(0x5656d1877d87ac7d),
  1474. C64e(0xe7e7cc2b192bd519), C64e(0xb5b513a662a67162),
  1475. C64e(0x4d4d7c31e6319ae6), C64e(0xecec59b59ab5c39a),
  1476. C64e(0x8f8f40cf45cf0545), C64e(0x1f1fa3bc9dbc3e9d),
  1477. C64e(0x898949c040c00940), C64e(0xfafa68928792ef87),
  1478. C64e(0xefefd03f153fc515), C64e(0xb2b29426eb267feb),
  1479. C64e(0x8e8ece40c94007c9), C64e(0xfbfbe61d0b1ded0b),
  1480. C64e(0x41416e2fec2f82ec), C64e(0xb3b31aa967a97d67),
  1481. C64e(0x5f5f431cfd1cbefd), C64e(0x45456025ea258aea),
  1482. C64e(0x2323f9dabfda46bf), C64e(0x53535102f702a6f7),
  1483. C64e(0xe4e445a196a1d396), C64e(0x9b9b76ed5bed2d5b),
  1484. C64e(0x7575285dc25deac2), C64e(0xe1e1c5241c24d91c),
  1485. C64e(0x3d3dd4e9aee97aae), C64e(0x4c4cf2be6abe986a),
  1486. C64e(0x6c6c82ee5aeed85a), C64e(0x7e7ebdc341c3fc41),
  1487. C64e(0xf5f5f3060206f102), C64e(0x838352d14fd11d4f),
  1488. C64e(0x68688ce45ce4d05c), C64e(0x51515607f407a2f4),
  1489. C64e(0xd1d18d5c345cb934), C64e(0xf9f9e1180818e908),
  1490. C64e(0xe2e24cae93aedf93), C64e(0xabab3e9573954d73),
  1491. C64e(0x626297f553f5c453), C64e(0x2a2a6b413f41543f),
  1492. C64e(0x08081c140c14100c), C64e(0x959563f652f63152),
  1493. C64e(0x4646e9af65af8c65), C64e(0x9d9d7fe25ee2215e),
  1494. C64e(0x3030487828786028), C64e(0x3737cff8a1f86ea1),
  1495. C64e(0x0a0a1b110f11140f), C64e(0x2f2febc4b5c45eb5),
  1496. C64e(0x0e0e151b091b1c09), C64e(0x24247e5a365a4836),
  1497. C64e(0x1b1badb69bb6369b), C64e(0xdfdf98473d47a53d),
  1498. C64e(0xcdcda76a266a8126), C64e(0x4e4ef5bb69bb9c69),
  1499. C64e(0x7f7f334ccd4cfecd), C64e(0xeaea50ba9fbacf9f),
  1500. C64e(0x12123f2d1b2d241b), C64e(0x1d1da4b99eb93a9e),
  1501. C64e(0x5858c49c749cb074), C64e(0x343446722e72682e),
  1502. C64e(0x363641772d776c2d), C64e(0xdcdc11cdb2cda3b2),
  1503. C64e(0xb4b49d29ee2973ee), C64e(0x5b5b4d16fb16b6fb),
  1504. C64e(0xa4a4a501f60153f6), C64e(0x7676a1d74dd7ec4d),
  1505. C64e(0xb7b714a361a37561), C64e(0x7d7d3449ce49face),
  1506. C64e(0x5252df8d7b8da47b), C64e(0xdddd9f423e42a13e),
  1507. C64e(0x5e5ecd937193bc71), C64e(0x1313b1a297a22697),
  1508. C64e(0xa6a6a204f50457f5), C64e(0xb9b901b868b86968),
  1509. C64e(0x0000000000000000), C64e(0xc1c1b5742c74992c),
  1510. C64e(0x4040e0a060a08060), C64e(0xe3e3c2211f21dd1f),
  1511. C64e(0x79793a43c843f2c8), C64e(0xb6b69a2ced2c77ed),
  1512. C64e(0xd4d40dd9bed9b3be), C64e(0x8d8d47ca46ca0146),
  1513. C64e(0x67671770d970ced9), C64e(0x7272afdd4bdde44b),
  1514. C64e(0x9494ed79de7933de), C64e(0x9898ff67d4672bd4),
  1515. C64e(0xb0b09323e8237be8), C64e(0x85855bde4ade114a),
  1516. C64e(0xbbbb06bd6bbd6d6b), C64e(0xc5c5bb7e2a7e912a),
  1517. C64e(0x4f4f7b34e5349ee5), C64e(0xededd73a163ac116),
  1518. C64e(0x8686d254c55417c5), C64e(0x9a9af862d7622fd7),
  1519. C64e(0x666699ff55ffcc55), C64e(0x1111b6a794a72294),
  1520. C64e(0x8a8ac04acf4a0fcf), C64e(0xe9e9d9301030c910),
  1521. C64e(0x04040e0a060a0806), C64e(0xfefe66988198e781),
  1522. C64e(0xa0a0ab0bf00b5bf0), C64e(0x7878b4cc44ccf044),
  1523. C64e(0x2525f0d5bad54aba), C64e(0x4b4b753ee33e96e3),
  1524. C64e(0xa2a2ac0ef30e5ff3), C64e(0x5d5d4419fe19bafe),
  1525. C64e(0x8080db5bc05b1bc0), C64e(0x050580858a850a8a),
  1526. C64e(0x3f3fd3ecadec7ead), C64e(0x2121fedfbcdf42bc),
  1527. C64e(0x7070a8d848d8e048), C64e(0xf1f1fd0c040cf904),
  1528. C64e(0x6363197adf7ac6df), C64e(0x77772f58c158eec1),
  1529. C64e(0xafaf309f759f4575), C64e(0x4242e7a563a58463),
  1530. C64e(0x2020705030504030), C64e(0xe5e5cb2e1a2ed11a),
  1531. C64e(0xfdfdef120e12e10e), C64e(0xbfbf08b76db7656d),
  1532. C64e(0x818155d44cd4194c), C64e(0x1818243c143c3014),
  1533. C64e(0x2626795f355f4c35), C64e(0xc3c3b2712f719d2f),
  1534. C64e(0xbebe8638e13867e1), C64e(0x3535c8fda2fd6aa2),
  1535. C64e(0x8888c74fcc4f0bcc), C64e(0x2e2e654b394b5c39),
  1536. C64e(0x93936af957f93d57), C64e(0x5555580df20daaf2),
  1537. C64e(0xfcfc619d829de382), C64e(0x7a7ab3c947c9f447),
  1538. C64e(0xc8c827efacef8bac), C64e(0xbaba8832e7326fe7),
  1539. C64e(0x32324f7d2b7d642b), C64e(0xe6e642a495a4d795),
  1540. C64e(0xc0c03bfba0fb9ba0), C64e(0x1919aab398b33298),
  1541. C64e(0x9e9ef668d16827d1), C64e(0xa3a322817f815d7f),
  1542. C64e(0x4444eeaa66aa8866), C64e(0x5454d6827e82a87e),
  1543. C64e(0x3b3bdde6abe676ab), C64e(0x0b0b959e839e1683),
  1544. C64e(0x8c8cc945ca4503ca), C64e(0xc7c7bc7b297b9529),
  1545. C64e(0x6b6b056ed36ed6d3), C64e(0x28286c443c44503c),
  1546. C64e(0xa7a72c8b798b5579), C64e(0xbcbc813de23d63e2),
  1547. C64e(0x161631271d272c1d), C64e(0xadad379a769a4176),
  1548. C64e(0xdbdb964d3b4dad3b), C64e(0x64649efa56fac856),
  1549. C64e(0x7474a6d24ed2e84e), C64e(0x141436221e22281e),
  1550. C64e(0x9292e476db763fdb), C64e(0x0c0c121e0a1e180a),
  1551. C64e(0x4848fcb46cb4906c), C64e(0xb8b88f37e4376be4),
  1552. C64e(0x9f9f78e75de7255d), C64e(0xbdbd0fb26eb2616e),
  1553. C64e(0x4343692aef2a86ef), C64e(0xc4c435f1a6f193a6),
  1554. C64e(0x3939dae3a8e372a8), C64e(0x3131c6f7a4f762a4),
  1555. C64e(0xd3d38a593759bd37), C64e(0xf2f274868b86ff8b),
  1556. C64e(0xd5d583563256b132), C64e(0x8b8b4ec543c50d43),
  1557. C64e(0x6e6e85eb59ebdc59), C64e(0xdada18c2b7c2afb7),
  1558. C64e(0x01018e8f8c8f028c), C64e(0xb1b11dac64ac7964),
  1559. C64e(0x9c9cf16dd26d23d2), C64e(0x4949723be03b92e0),
  1560. C64e(0xd8d81fc7b4c7abb4), C64e(0xacacb915fa1543fa),
  1561. C64e(0xf3f3fa090709fd07), C64e(0xcfcfa06f256f8525),
  1562. C64e(0xcaca20eaafea8faf), C64e(0xf4f47d898e89f38e),
  1563. C64e(0x47476720e9208ee9), C64e(0x1010382818282018),
  1564. C64e(0x6f6f0b64d564ded5), C64e(0xf0f073838883fb88),
  1565. C64e(0x4a4afbb16fb1946f), C64e(0x5c5cca967296b872),
  1566. C64e(0x3838546c246c7024), C64e(0x57575f08f108aef1),
  1567. C64e(0x73732152c752e6c7), C64e(0x979764f351f33551),
  1568. C64e(0xcbcbae6523658d23), C64e(0xa1a125847c84597c),
  1569. C64e(0xe8e857bf9cbfcb9c), C64e(0x3e3e5d6321637c21),
  1570. C64e(0x9696ea7cdd7c37dd), C64e(0x61611e7fdc7fc2dc),
  1571. C64e(0x0d0d9c9186911a86), C64e(0x0f0f9b9485941e85),
  1572. C64e(0xe0e04bab90abdb90), C64e(0x7c7cbac642c6f842),
  1573. C64e(0x71712657c457e2c4), C64e(0xcccc29e5aae583aa),
  1574. C64e(0x9090e373d8733bd8), C64e(0x0606090f050f0c05),
  1575. C64e(0xf7f7f4030103f501), C64e(0x1c1c2a3612363812),
  1576. C64e(0xc2c23cfea3fe9fa3), C64e(0x6a6a8be15fe1d45f),
  1577. C64e(0xaeaebe10f91047f9), C64e(0x6969026bd06bd2d0),
  1578. C64e(0x1717bfa891a82e91), C64e(0x999971e858e82958),
  1579. C64e(0x3a3a536927697427), C64e(0x2727f7d0b9d04eb9),
  1580. C64e(0xd9d991483848a938), C64e(0xebebde351335cd13),
  1581. C64e(0x2b2be5ceb3ce56b3), C64e(0x2222775533554433),
  1582. C64e(0xd2d204d6bbd6bfbb), C64e(0xa9a9399070904970),
  1583. C64e(0x0707878089800e89), C64e(0x3333c1f2a7f266a7),
  1584. C64e(0x2d2decc1b6c15ab6), C64e(0x3c3c5a6622667822),
  1585. C64e(0x1515b8ad92ad2a92), C64e(0xc9c9a96020608920),
  1586. C64e(0x87875cdb49db1549), C64e(0xaaaab01aff1a4fff),
  1587. C64e(0x5050d8887888a078), C64e(0xa5a52b8e7a8e517a),
  1588. C64e(0x0303898a8f8a068f), C64e(0x59594a13f813b2f8),
  1589. C64e(0x0909929b809b1280), C64e(0x1a1a233917393417),
  1590. C64e(0x65651075da75cada), C64e(0xd7d784533153b531),
  1591. C64e(0x8484d551c65113c6), C64e(0xd0d003d3b8d3bbb8),
  1592. C64e(0x8282dc5ec35e1fc3), C64e(0x2929e2cbb0cb52b0),
  1593. C64e(0x5a5ac3997799b477), C64e(0x1e1e2d3311333c11),
  1594. C64e(0x7b7b3d46cb46f6cb), C64e(0xa8a8b71ffc1f4bfc),
  1595. C64e(0x6d6d0c61d661dad6), C64e(0x2c2c624e3a4e583a)
  1596. };
  1597.  
  1598. __constant const sph_u64 T2_C[] = {
  1599. C64e(0xa5c6c632f4a5f497), C64e(0x84f8f86f978497eb),
  1600. C64e(0x99eeee5eb099b0c7), C64e(0x8df6f67a8c8d8cf7),
  1601. C64e(0x0dffffe8170d17e5), C64e(0xbdd6d60adcbddcb7),
  1602. C64e(0xb1dede16c8b1c8a7), C64e(0x5491916dfc54fc39),
  1603. C64e(0x50606090f050f0c0), C64e(0x0302020705030504),
  1604. C64e(0xa9cece2ee0a9e087), C64e(0x7d5656d1877d87ac),
  1605. C64e(0x19e7e7cc2b192bd5), C64e(0x62b5b513a662a671),
  1606. C64e(0xe64d4d7c31e6319a), C64e(0x9aecec59b59ab5c3),
  1607. C64e(0x458f8f40cf45cf05), C64e(0x9d1f1fa3bc9dbc3e),
  1608. C64e(0x40898949c040c009), C64e(0x87fafa68928792ef),
  1609. C64e(0x15efefd03f153fc5), C64e(0xebb2b29426eb267f),
  1610. C64e(0xc98e8ece40c94007), C64e(0x0bfbfbe61d0b1ded),
  1611. C64e(0xec41416e2fec2f82), C64e(0x67b3b31aa967a97d),
  1612. C64e(0xfd5f5f431cfd1cbe), C64e(0xea45456025ea258a),
  1613. C64e(0xbf2323f9dabfda46), C64e(0xf753535102f702a6),
  1614. C64e(0x96e4e445a196a1d3), C64e(0x5b9b9b76ed5bed2d),
  1615. C64e(0xc27575285dc25dea), C64e(0x1ce1e1c5241c24d9),
  1616. C64e(0xae3d3dd4e9aee97a), C64e(0x6a4c4cf2be6abe98),
  1617. C64e(0x5a6c6c82ee5aeed8), C64e(0x417e7ebdc341c3fc),
  1618. C64e(0x02f5f5f3060206f1), C64e(0x4f838352d14fd11d),
  1619. C64e(0x5c68688ce45ce4d0), C64e(0xf451515607f407a2),
  1620. C64e(0x34d1d18d5c345cb9), C64e(0x08f9f9e1180818e9),
  1621. C64e(0x93e2e24cae93aedf), C64e(0x73abab3e9573954d),
  1622. C64e(0x53626297f553f5c4), C64e(0x3f2a2a6b413f4154),
  1623. C64e(0x0c08081c140c1410), C64e(0x52959563f652f631),
  1624. C64e(0x654646e9af65af8c), C64e(0x5e9d9d7fe25ee221),
  1625. C64e(0x2830304878287860), C64e(0xa13737cff8a1f86e),
  1626. C64e(0x0f0a0a1b110f1114), C64e(0xb52f2febc4b5c45e),
  1627. C64e(0x090e0e151b091b1c), C64e(0x3624247e5a365a48),
  1628. C64e(0x9b1b1badb69bb636), C64e(0x3ddfdf98473d47a5),
  1629. C64e(0x26cdcda76a266a81), C64e(0x694e4ef5bb69bb9c),
  1630. C64e(0xcd7f7f334ccd4cfe), C64e(0x9feaea50ba9fbacf),
  1631. C64e(0x1b12123f2d1b2d24), C64e(0x9e1d1da4b99eb93a),
  1632. C64e(0x745858c49c749cb0), C64e(0x2e343446722e7268),
  1633. C64e(0x2d363641772d776c), C64e(0xb2dcdc11cdb2cda3),
  1634. C64e(0xeeb4b49d29ee2973), C64e(0xfb5b5b4d16fb16b6),
  1635. C64e(0xf6a4a4a501f60153), C64e(0x4d7676a1d74dd7ec),
  1636. C64e(0x61b7b714a361a375), C64e(0xce7d7d3449ce49fa),
  1637. C64e(0x7b5252df8d7b8da4), C64e(0x3edddd9f423e42a1),
  1638. C64e(0x715e5ecd937193bc), C64e(0x971313b1a297a226),
  1639. C64e(0xf5a6a6a204f50457), C64e(0x68b9b901b868b869),
  1640. C64e(0x0000000000000000), C64e(0x2cc1c1b5742c7499),
  1641. C64e(0x604040e0a060a080), C64e(0x1fe3e3c2211f21dd),
  1642. C64e(0xc879793a43c843f2), C64e(0xedb6b69a2ced2c77),
  1643. C64e(0xbed4d40dd9bed9b3), C64e(0x468d8d47ca46ca01),
  1644. C64e(0xd967671770d970ce), C64e(0x4b7272afdd4bdde4),
  1645. C64e(0xde9494ed79de7933), C64e(0xd49898ff67d4672b),
  1646. C64e(0xe8b0b09323e8237b), C64e(0x4a85855bde4ade11),
  1647. C64e(0x6bbbbb06bd6bbd6d), C64e(0x2ac5c5bb7e2a7e91),
  1648. C64e(0xe54f4f7b34e5349e), C64e(0x16ededd73a163ac1),
  1649. C64e(0xc58686d254c55417), C64e(0xd79a9af862d7622f),
  1650. C64e(0x55666699ff55ffcc), C64e(0x941111b6a794a722),
  1651. C64e(0xcf8a8ac04acf4a0f), C64e(0x10e9e9d9301030c9),
  1652. C64e(0x0604040e0a060a08), C64e(0x81fefe66988198e7),
  1653. C64e(0xf0a0a0ab0bf00b5b), C64e(0x447878b4cc44ccf0),
  1654. C64e(0xba2525f0d5bad54a), C64e(0xe34b4b753ee33e96),
  1655. C64e(0xf3a2a2ac0ef30e5f), C64e(0xfe5d5d4419fe19ba),
  1656. C64e(0xc08080db5bc05b1b), C64e(0x8a050580858a850a),
  1657. C64e(0xad3f3fd3ecadec7e), C64e(0xbc2121fedfbcdf42),
  1658. C64e(0x487070a8d848d8e0), C64e(0x04f1f1fd0c040cf9),
  1659. C64e(0xdf6363197adf7ac6), C64e(0xc177772f58c158ee),
  1660. C64e(0x75afaf309f759f45), C64e(0x634242e7a563a584),
  1661. C64e(0x3020207050305040), C64e(0x1ae5e5cb2e1a2ed1),
  1662. C64e(0x0efdfdef120e12e1), C64e(0x6dbfbf08b76db765),
  1663. C64e(0x4c818155d44cd419), C64e(0x141818243c143c30),
  1664. C64e(0x352626795f355f4c), C64e(0x2fc3c3b2712f719d),
  1665. C64e(0xe1bebe8638e13867), C64e(0xa23535c8fda2fd6a),
  1666. C64e(0xcc8888c74fcc4f0b), C64e(0x392e2e654b394b5c),
  1667. C64e(0x5793936af957f93d), C64e(0xf25555580df20daa),
  1668. C64e(0x82fcfc619d829de3), C64e(0x477a7ab3c947c9f4),
  1669. C64e(0xacc8c827efacef8b), C64e(0xe7baba8832e7326f),
  1670. C64e(0x2b32324f7d2b7d64), C64e(0x95e6e642a495a4d7),
  1671. C64e(0xa0c0c03bfba0fb9b), C64e(0x981919aab398b332),
  1672. C64e(0xd19e9ef668d16827), C64e(0x7fa3a322817f815d),
  1673. C64e(0x664444eeaa66aa88), C64e(0x7e5454d6827e82a8),
  1674. C64e(0xab3b3bdde6abe676), C64e(0x830b0b959e839e16),
  1675. C64e(0xca8c8cc945ca4503), C64e(0x29c7c7bc7b297b95),
  1676. C64e(0xd36b6b056ed36ed6), C64e(0x3c28286c443c4450),
  1677. C64e(0x79a7a72c8b798b55), C64e(0xe2bcbc813de23d63),
  1678. C64e(0x1d161631271d272c), C64e(0x76adad379a769a41),
  1679. C64e(0x3bdbdb964d3b4dad), C64e(0x5664649efa56fac8),
  1680. C64e(0x4e7474a6d24ed2e8), C64e(0x1e141436221e2228),
  1681. C64e(0xdb9292e476db763f), C64e(0x0a0c0c121e0a1e18),
  1682. C64e(0x6c4848fcb46cb490), C64e(0xe4b8b88f37e4376b),
  1683. C64e(0x5d9f9f78e75de725), C64e(0x6ebdbd0fb26eb261),
  1684. C64e(0xef4343692aef2a86), C64e(0xa6c4c435f1a6f193),
  1685. C64e(0xa83939dae3a8e372), C64e(0xa43131c6f7a4f762),
  1686. C64e(0x37d3d38a593759bd), C64e(0x8bf2f274868b86ff),
  1687. C64e(0x32d5d583563256b1), C64e(0x438b8b4ec543c50d),
  1688. C64e(0x596e6e85eb59ebdc), C64e(0xb7dada18c2b7c2af),
  1689. C64e(0x8c01018e8f8c8f02), C64e(0x64b1b11dac64ac79),
  1690. C64e(0xd29c9cf16dd26d23), C64e(0xe04949723be03b92),
  1691. C64e(0xb4d8d81fc7b4c7ab), C64e(0xfaacacb915fa1543),
  1692. C64e(0x07f3f3fa090709fd), C64e(0x25cfcfa06f256f85),
  1693. C64e(0xafcaca20eaafea8f), C64e(0x8ef4f47d898e89f3),
  1694. C64e(0xe947476720e9208e), C64e(0x1810103828182820),
  1695. C64e(0xd56f6f0b64d564de), C64e(0x88f0f073838883fb),
  1696. C64e(0x6f4a4afbb16fb194), C64e(0x725c5cca967296b8),
  1697. C64e(0x243838546c246c70), C64e(0xf157575f08f108ae),
  1698. C64e(0xc773732152c752e6), C64e(0x51979764f351f335),
  1699. C64e(0x23cbcbae6523658d), C64e(0x7ca1a125847c8459),
  1700. C64e(0x9ce8e857bf9cbfcb), C64e(0x213e3e5d6321637c),
  1701. C64e(0xdd9696ea7cdd7c37), C64e(0xdc61611e7fdc7fc2),
  1702. C64e(0x860d0d9c9186911a), C64e(0x850f0f9b9485941e),
  1703. C64e(0x90e0e04bab90abdb), C64e(0x427c7cbac642c6f8),
  1704. C64e(0xc471712657c457e2), C64e(0xaacccc29e5aae583),
  1705. C64e(0xd89090e373d8733b), C64e(0x050606090f050f0c),
  1706. C64e(0x01f7f7f4030103f5), C64e(0x121c1c2a36123638),
  1707. C64e(0xa3c2c23cfea3fe9f), C64e(0x5f6a6a8be15fe1d4),
  1708. C64e(0xf9aeaebe10f91047), C64e(0xd06969026bd06bd2),
  1709. C64e(0x911717bfa891a82e), C64e(0x58999971e858e829),
  1710. C64e(0x273a3a5369276974), C64e(0xb92727f7d0b9d04e),
  1711. C64e(0x38d9d991483848a9), C64e(0x13ebebde351335cd),
  1712. C64e(0xb32b2be5ceb3ce56), C64e(0x3322227755335544),
  1713. C64e(0xbbd2d204d6bbd6bf), C64e(0x70a9a93990709049),
  1714. C64e(0x890707878089800e), C64e(0xa73333c1f2a7f266),
  1715. C64e(0xb62d2decc1b6c15a), C64e(0x223c3c5a66226678),
  1716. C64e(0x921515b8ad92ad2a), C64e(0x20c9c9a960206089),
  1717. C64e(0x4987875cdb49db15), C64e(0xffaaaab01aff1a4f),
  1718. C64e(0x785050d8887888a0), C64e(0x7aa5a52b8e7a8e51),
  1719. C64e(0x8f0303898a8f8a06), C64e(0xf859594a13f813b2),
  1720. C64e(0x800909929b809b12), C64e(0x171a1a2339173934),
  1721. C64e(0xda65651075da75ca), C64e(0x31d7d784533153b5),
  1722. C64e(0xc68484d551c65113), C64e(0xb8d0d003d3b8d3bb),
  1723. C64e(0xc38282dc5ec35e1f), C64e(0xb02929e2cbb0cb52),
  1724. C64e(0x775a5ac3997799b4), C64e(0x111e1e2d3311333c),
  1725. C64e(0xcb7b7b3d46cb46f6), C64e(0xfca8a8b71ffc1f4b),
  1726. C64e(0xd66d6d0c61d661da), C64e(0x3a2c2c624e3a4e58)
  1727. };
  1728.  
  1729. __constant const sph_u64 T3_C[] = {
  1730. C64e(0x97a5c6c632f4a5f4), C64e(0xeb84f8f86f978497),
  1731. C64e(0xc799eeee5eb099b0), C64e(0xf78df6f67a8c8d8c),
  1732. C64e(0xe50dffffe8170d17), C64e(0xb7bdd6d60adcbddc),
  1733. C64e(0xa7b1dede16c8b1c8), C64e(0x395491916dfc54fc),
  1734. C64e(0xc050606090f050f0), C64e(0x0403020207050305),
  1735. C64e(0x87a9cece2ee0a9e0), C64e(0xac7d5656d1877d87),
  1736. C64e(0xd519e7e7cc2b192b), C64e(0x7162b5b513a662a6),
  1737. C64e(0x9ae64d4d7c31e631), C64e(0xc39aecec59b59ab5),
  1738. C64e(0x05458f8f40cf45cf), C64e(0x3e9d1f1fa3bc9dbc),
  1739. C64e(0x0940898949c040c0), C64e(0xef87fafa68928792),
  1740. C64e(0xc515efefd03f153f), C64e(0x7febb2b29426eb26),
  1741. C64e(0x07c98e8ece40c940), C64e(0xed0bfbfbe61d0b1d),
  1742. C64e(0x82ec41416e2fec2f), C64e(0x7d67b3b31aa967a9),
  1743. C64e(0xbefd5f5f431cfd1c), C64e(0x8aea45456025ea25),
  1744. C64e(0x46bf2323f9dabfda), C64e(0xa6f753535102f702),
  1745. C64e(0xd396e4e445a196a1), C64e(0x2d5b9b9b76ed5bed),
  1746. C64e(0xeac27575285dc25d), C64e(0xd91ce1e1c5241c24),
  1747. C64e(0x7aae3d3dd4e9aee9), C64e(0x986a4c4cf2be6abe),
  1748. C64e(0xd85a6c6c82ee5aee), C64e(0xfc417e7ebdc341c3),
  1749. C64e(0xf102f5f5f3060206), C64e(0x1d4f838352d14fd1),
  1750. C64e(0xd05c68688ce45ce4), C64e(0xa2f451515607f407),
  1751. C64e(0xb934d1d18d5c345c), C64e(0xe908f9f9e1180818),
  1752. C64e(0xdf93e2e24cae93ae), C64e(0x4d73abab3e957395),
  1753. C64e(0xc453626297f553f5), C64e(0x543f2a2a6b413f41),
  1754. C64e(0x100c08081c140c14), C64e(0x3152959563f652f6),
  1755. C64e(0x8c654646e9af65af), C64e(0x215e9d9d7fe25ee2),
  1756. C64e(0x6028303048782878), C64e(0x6ea13737cff8a1f8),
  1757. C64e(0x140f0a0a1b110f11), C64e(0x5eb52f2febc4b5c4),
  1758. C64e(0x1c090e0e151b091b), C64e(0x483624247e5a365a),
  1759. C64e(0x369b1b1badb69bb6), C64e(0xa53ddfdf98473d47),
  1760. C64e(0x8126cdcda76a266a), C64e(0x9c694e4ef5bb69bb),
  1761. C64e(0xfecd7f7f334ccd4c), C64e(0xcf9feaea50ba9fba),
  1762. C64e(0x241b12123f2d1b2d), C64e(0x3a9e1d1da4b99eb9),
  1763. C64e(0xb0745858c49c749c), C64e(0x682e343446722e72),
  1764. C64e(0x6c2d363641772d77), C64e(0xa3b2dcdc11cdb2cd),
  1765. C64e(0x73eeb4b49d29ee29), C64e(0xb6fb5b5b4d16fb16),
  1766. C64e(0x53f6a4a4a501f601), C64e(0xec4d7676a1d74dd7),
  1767. C64e(0x7561b7b714a361a3), C64e(0xface7d7d3449ce49),
  1768. C64e(0xa47b5252df8d7b8d), C64e(0xa13edddd9f423e42),
  1769. C64e(0xbc715e5ecd937193), C64e(0x26971313b1a297a2),
  1770. C64e(0x57f5a6a6a204f504), C64e(0x6968b9b901b868b8),
  1771. C64e(0x0000000000000000), C64e(0x992cc1c1b5742c74),
  1772. C64e(0x80604040e0a060a0), C64e(0xdd1fe3e3c2211f21),
  1773. C64e(0xf2c879793a43c843), C64e(0x77edb6b69a2ced2c),
  1774. C64e(0xb3bed4d40dd9bed9), C64e(0x01468d8d47ca46ca),
  1775. C64e(0xced967671770d970), C64e(0xe44b7272afdd4bdd),
  1776. C64e(0x33de9494ed79de79), C64e(0x2bd49898ff67d467),
  1777. C64e(0x7be8b0b09323e823), C64e(0x114a85855bde4ade),
  1778. C64e(0x6d6bbbbb06bd6bbd), C64e(0x912ac5c5bb7e2a7e),
  1779. C64e(0x9ee54f4f7b34e534), C64e(0xc116ededd73a163a),
  1780. C64e(0x17c58686d254c554), C64e(0x2fd79a9af862d762),
  1781. C64e(0xcc55666699ff55ff), C64e(0x22941111b6a794a7),
  1782. C64e(0x0fcf8a8ac04acf4a), C64e(0xc910e9e9d9301030),
  1783. C64e(0x080604040e0a060a), C64e(0xe781fefe66988198),
  1784. C64e(0x5bf0a0a0ab0bf00b), C64e(0xf0447878b4cc44cc),
  1785. C64e(0x4aba2525f0d5bad5), C64e(0x96e34b4b753ee33e),
  1786. C64e(0x5ff3a2a2ac0ef30e), C64e(0xbafe5d5d4419fe19),
  1787. C64e(0x1bc08080db5bc05b), C64e(0x0a8a050580858a85),
  1788. C64e(0x7ead3f3fd3ecadec), C64e(0x42bc2121fedfbcdf),
  1789. C64e(0xe0487070a8d848d8), C64e(0xf904f1f1fd0c040c),
  1790. C64e(0xc6df6363197adf7a), C64e(0xeec177772f58c158),
  1791. C64e(0x4575afaf309f759f), C64e(0x84634242e7a563a5),
  1792. C64e(0x4030202070503050), C64e(0xd11ae5e5cb2e1a2e),
  1793. C64e(0xe10efdfdef120e12), C64e(0x656dbfbf08b76db7),
  1794. C64e(0x194c818155d44cd4), C64e(0x30141818243c143c),
  1795. C64e(0x4c352626795f355f), C64e(0x9d2fc3c3b2712f71),
  1796. C64e(0x67e1bebe8638e138), C64e(0x6aa23535c8fda2fd),
  1797. C64e(0x0bcc8888c74fcc4f), C64e(0x5c392e2e654b394b),
  1798. C64e(0x3d5793936af957f9), C64e(0xaaf25555580df20d),
  1799. C64e(0xe382fcfc619d829d), C64e(0xf4477a7ab3c947c9),
  1800. C64e(0x8bacc8c827efacef), C64e(0x6fe7baba8832e732),
  1801. C64e(0x642b32324f7d2b7d), C64e(0xd795e6e642a495a4),
  1802. C64e(0x9ba0c0c03bfba0fb), C64e(0x32981919aab398b3),
  1803. C64e(0x27d19e9ef668d168), C64e(0x5d7fa3a322817f81),
  1804. C64e(0x88664444eeaa66aa), C64e(0xa87e5454d6827e82),
  1805. C64e(0x76ab3b3bdde6abe6), C64e(0x16830b0b959e839e),
  1806. C64e(0x03ca8c8cc945ca45), C64e(0x9529c7c7bc7b297b),
  1807. C64e(0xd6d36b6b056ed36e), C64e(0x503c28286c443c44),
  1808. C64e(0x5579a7a72c8b798b), C64e(0x63e2bcbc813de23d),
  1809. C64e(0x2c1d161631271d27), C64e(0x4176adad379a769a),
  1810. C64e(0xad3bdbdb964d3b4d), C64e(0xc85664649efa56fa),
  1811. C64e(0xe84e7474a6d24ed2), C64e(0x281e141436221e22),
  1812. C64e(0x3fdb9292e476db76), C64e(0x180a0c0c121e0a1e),
  1813. C64e(0x906c4848fcb46cb4), C64e(0x6be4b8b88f37e437),
  1814. C64e(0x255d9f9f78e75de7), C64e(0x616ebdbd0fb26eb2),
  1815. C64e(0x86ef4343692aef2a), C64e(0x93a6c4c435f1a6f1),
  1816. C64e(0x72a83939dae3a8e3), C64e(0x62a43131c6f7a4f7),
  1817. C64e(0xbd37d3d38a593759), C64e(0xff8bf2f274868b86),
  1818. C64e(0xb132d5d583563256), C64e(0x0d438b8b4ec543c5),
  1819. C64e(0xdc596e6e85eb59eb), C64e(0xafb7dada18c2b7c2),
  1820. C64e(0x028c01018e8f8c8f), C64e(0x7964b1b11dac64ac),
  1821. C64e(0x23d29c9cf16dd26d), C64e(0x92e04949723be03b),
  1822. C64e(0xabb4d8d81fc7b4c7), C64e(0x43faacacb915fa15),
  1823. C64e(0xfd07f3f3fa090709), C64e(0x8525cfcfa06f256f),
  1824. C64e(0x8fafcaca20eaafea), C64e(0xf38ef4f47d898e89),
  1825. C64e(0x8ee947476720e920), C64e(0x2018101038281828),
  1826. C64e(0xded56f6f0b64d564), C64e(0xfb88f0f073838883),
  1827. C64e(0x946f4a4afbb16fb1), C64e(0xb8725c5cca967296),
  1828. C64e(0x70243838546c246c), C64e(0xaef157575f08f108),
  1829. C64e(0xe6c773732152c752), C64e(0x3551979764f351f3),
  1830. C64e(0x8d23cbcbae652365), C64e(0x597ca1a125847c84),
  1831. C64e(0xcb9ce8e857bf9cbf), C64e(0x7c213e3e5d632163),
  1832. C64e(0x37dd9696ea7cdd7c), C64e(0xc2dc61611e7fdc7f),
  1833. C64e(0x1a860d0d9c918691), C64e(0x1e850f0f9b948594),
  1834. C64e(0xdb90e0e04bab90ab), C64e(0xf8427c7cbac642c6),
  1835. C64e(0xe2c471712657c457), C64e(0x83aacccc29e5aae5),
  1836. C64e(0x3bd89090e373d873), C64e(0x0c050606090f050f),
  1837. C64e(0xf501f7f7f4030103), C64e(0x38121c1c2a361236),
  1838. C64e(0x9fa3c2c23cfea3fe), C64e(0xd45f6a6a8be15fe1),
  1839. C64e(0x47f9aeaebe10f910), C64e(0xd2d06969026bd06b),
  1840. C64e(0x2e911717bfa891a8), C64e(0x2958999971e858e8),
  1841. C64e(0x74273a3a53692769), C64e(0x4eb92727f7d0b9d0),
  1842. C64e(0xa938d9d991483848), C64e(0xcd13ebebde351335),
  1843. C64e(0x56b32b2be5ceb3ce), C64e(0x4433222277553355),
  1844. C64e(0xbfbbd2d204d6bbd6), C64e(0x4970a9a939907090),
  1845. C64e(0x0e89070787808980), C64e(0x66a73333c1f2a7f2),
  1846. C64e(0x5ab62d2decc1b6c1), C64e(0x78223c3c5a662266),
  1847. C64e(0x2a921515b8ad92ad), C64e(0x8920c9c9a9602060),
  1848. C64e(0x154987875cdb49db), C64e(0x4fffaaaab01aff1a),
  1849. C64e(0xa0785050d8887888), C64e(0x517aa5a52b8e7a8e),
  1850. C64e(0x068f0303898a8f8a), C64e(0xb2f859594a13f813),
  1851. C64e(0x12800909929b809b), C64e(0x34171a1a23391739),
  1852. C64e(0xcada65651075da75), C64e(0xb531d7d784533153),
  1853. C64e(0x13c68484d551c651), C64e(0xbbb8d0d003d3b8d3),
  1854. C64e(0x1fc38282dc5ec35e), C64e(0x52b02929e2cbb0cb),
  1855. C64e(0xb4775a5ac3997799), C64e(0x3c111e1e2d331133),
  1856. C64e(0xf6cb7b7b3d46cb46), C64e(0x4bfca8a8b71ffc1f),
  1857. C64e(0xdad66d6d0c61d661), C64e(0x583a2c2c624e3a4e)
  1858. };
  1859.  
  1860. #endif
  1861.  
  1862. __constant const sph_u64 T4_C[] = {
  1863. C64e(0xf497a5c6c632f4a5), C64e(0x97eb84f8f86f9784),
  1864. C64e(0xb0c799eeee5eb099), C64e(0x8cf78df6f67a8c8d),
  1865. C64e(0x17e50dffffe8170d), C64e(0xdcb7bdd6d60adcbd),
  1866. C64e(0xc8a7b1dede16c8b1), C64e(0xfc395491916dfc54),
  1867. C64e(0xf0c050606090f050), C64e(0x0504030202070503),
  1868. C64e(0xe087a9cece2ee0a9), C64e(0x87ac7d5656d1877d),
  1869. C64e(0x2bd519e7e7cc2b19), C64e(0xa67162b5b513a662),
  1870. C64e(0x319ae64d4d7c31e6), C64e(0xb5c39aecec59b59a),
  1871. C64e(0xcf05458f8f40cf45), C64e(0xbc3e9d1f1fa3bc9d),
  1872. C64e(0xc00940898949c040), C64e(0x92ef87fafa689287),
  1873. C64e(0x3fc515efefd03f15), C64e(0x267febb2b29426eb),
  1874. C64e(0x4007c98e8ece40c9), C64e(0x1ded0bfbfbe61d0b),
  1875. C64e(0x2f82ec41416e2fec), C64e(0xa97d67b3b31aa967),
  1876. C64e(0x1cbefd5f5f431cfd), C64e(0x258aea45456025ea),
  1877. C64e(0xda46bf2323f9dabf), C64e(0x02a6f753535102f7),
  1878. C64e(0xa1d396e4e445a196), C64e(0xed2d5b9b9b76ed5b),
  1879. C64e(0x5deac27575285dc2), C64e(0x24d91ce1e1c5241c),
  1880. C64e(0xe97aae3d3dd4e9ae), C64e(0xbe986a4c4cf2be6a),
  1881. C64e(0xeed85a6c6c82ee5a), C64e(0xc3fc417e7ebdc341),
  1882. C64e(0x06f102f5f5f30602), C64e(0xd11d4f838352d14f),
  1883. C64e(0xe4d05c68688ce45c), C64e(0x07a2f451515607f4),
  1884. C64e(0x5cb934d1d18d5c34), C64e(0x18e908f9f9e11808),
  1885. C64e(0xaedf93e2e24cae93), C64e(0x954d73abab3e9573),
  1886. C64e(0xf5c453626297f553), C64e(0x41543f2a2a6b413f),
  1887. C64e(0x14100c08081c140c), C64e(0xf63152959563f652),
  1888. C64e(0xaf8c654646e9af65), C64e(0xe2215e9d9d7fe25e),
  1889. C64e(0x7860283030487828), C64e(0xf86ea13737cff8a1),
  1890. C64e(0x11140f0a0a1b110f), C64e(0xc45eb52f2febc4b5),
  1891. C64e(0x1b1c090e0e151b09), C64e(0x5a483624247e5a36),
  1892. C64e(0xb6369b1b1badb69b), C64e(0x47a53ddfdf98473d),
  1893. C64e(0x6a8126cdcda76a26), C64e(0xbb9c694e4ef5bb69),
  1894. C64e(0x4cfecd7f7f334ccd), C64e(0xbacf9feaea50ba9f),
  1895. C64e(0x2d241b12123f2d1b), C64e(0xb93a9e1d1da4b99e),
  1896. C64e(0x9cb0745858c49c74), C64e(0x72682e343446722e),
  1897. C64e(0x776c2d363641772d), C64e(0xcda3b2dcdc11cdb2),
  1898. C64e(0x2973eeb4b49d29ee), C64e(0x16b6fb5b5b4d16fb),
  1899. C64e(0x0153f6a4a4a501f6), C64e(0xd7ec4d7676a1d74d),
  1900. C64e(0xa37561b7b714a361), C64e(0x49face7d7d3449ce),
  1901. C64e(0x8da47b5252df8d7b), C64e(0x42a13edddd9f423e),
  1902. C64e(0x93bc715e5ecd9371), C64e(0xa226971313b1a297),
  1903. C64e(0x0457f5a6a6a204f5), C64e(0xb86968b9b901b868),
  1904. C64e(0x0000000000000000), C64e(0x74992cc1c1b5742c),
  1905. C64e(0xa080604040e0a060), C64e(0x21dd1fe3e3c2211f),
  1906. C64e(0x43f2c879793a43c8), C64e(0x2c77edb6b69a2ced),
  1907. C64e(0xd9b3bed4d40dd9be), C64e(0xca01468d8d47ca46),
  1908. C64e(0x70ced967671770d9), C64e(0xdde44b7272afdd4b),
  1909. C64e(0x7933de9494ed79de), C64e(0x672bd49898ff67d4),
  1910. C64e(0x237be8b0b09323e8), C64e(0xde114a85855bde4a),
  1911. C64e(0xbd6d6bbbbb06bd6b), C64e(0x7e912ac5c5bb7e2a),
  1912. C64e(0x349ee54f4f7b34e5), C64e(0x3ac116ededd73a16),
  1913. C64e(0x5417c58686d254c5), C64e(0x622fd79a9af862d7),
  1914. C64e(0xffcc55666699ff55), C64e(0xa722941111b6a794),
  1915. C64e(0x4a0fcf8a8ac04acf), C64e(0x30c910e9e9d93010),
  1916. C64e(0x0a080604040e0a06), C64e(0x98e781fefe669881),
  1917. C64e(0x0b5bf0a0a0ab0bf0), C64e(0xccf0447878b4cc44),
  1918. C64e(0xd54aba2525f0d5ba), C64e(0x3e96e34b4b753ee3),
  1919. C64e(0x0e5ff3a2a2ac0ef3), C64e(0x19bafe5d5d4419fe),
  1920. C64e(0x5b1bc08080db5bc0), C64e(0x850a8a050580858a),
  1921. C64e(0xec7ead3f3fd3ecad), C64e(0xdf42bc2121fedfbc),
  1922. C64e(0xd8e0487070a8d848), C64e(0x0cf904f1f1fd0c04),
  1923. C64e(0x7ac6df6363197adf), C64e(0x58eec177772f58c1),
  1924. C64e(0x9f4575afaf309f75), C64e(0xa584634242e7a563),
  1925. C64e(0x5040302020705030), C64e(0x2ed11ae5e5cb2e1a),
  1926. C64e(0x12e10efdfdef120e), C64e(0xb7656dbfbf08b76d),
  1927. C64e(0xd4194c818155d44c), C64e(0x3c30141818243c14),
  1928. C64e(0x5f4c352626795f35), C64e(0x719d2fc3c3b2712f),
  1929. C64e(0x3867e1bebe8638e1), C64e(0xfd6aa23535c8fda2),
  1930. C64e(0x4f0bcc8888c74fcc), C64e(0x4b5c392e2e654b39),
  1931. C64e(0xf93d5793936af957), C64e(0x0daaf25555580df2),
  1932. C64e(0x9de382fcfc619d82), C64e(0xc9f4477a7ab3c947),
  1933. C64e(0xef8bacc8c827efac), C64e(0x326fe7baba8832e7),
  1934. C64e(0x7d642b32324f7d2b), C64e(0xa4d795e6e642a495),
  1935. C64e(0xfb9ba0c0c03bfba0), C64e(0xb332981919aab398),
  1936. C64e(0x6827d19e9ef668d1), C64e(0x815d7fa3a322817f),
  1937. C64e(0xaa88664444eeaa66), C64e(0x82a87e5454d6827e),
  1938. C64e(0xe676ab3b3bdde6ab), C64e(0x9e16830b0b959e83),
  1939. C64e(0x4503ca8c8cc945ca), C64e(0x7b9529c7c7bc7b29),
  1940. C64e(0x6ed6d36b6b056ed3), C64e(0x44503c28286c443c),
  1941. C64e(0x8b5579a7a72c8b79), C64e(0x3d63e2bcbc813de2),
  1942. C64e(0x272c1d161631271d), C64e(0x9a4176adad379a76),
  1943. C64e(0x4dad3bdbdb964d3b), C64e(0xfac85664649efa56),
  1944. C64e(0xd2e84e7474a6d24e), C64e(0x22281e141436221e),
  1945. C64e(0x763fdb9292e476db), C64e(0x1e180a0c0c121e0a),
  1946. C64e(0xb4906c4848fcb46c), C64e(0x376be4b8b88f37e4),
  1947. C64e(0xe7255d9f9f78e75d), C64e(0xb2616ebdbd0fb26e),
  1948. C64e(0x2a86ef4343692aef), C64e(0xf193a6c4c435f1a6),
  1949. C64e(0xe372a83939dae3a8), C64e(0xf762a43131c6f7a4),
  1950. C64e(0x59bd37d3d38a5937), C64e(0x86ff8bf2f274868b),
  1951. C64e(0x56b132d5d5835632), C64e(0xc50d438b8b4ec543),
  1952. C64e(0xebdc596e6e85eb59), C64e(0xc2afb7dada18c2b7),
  1953. C64e(0x8f028c01018e8f8c), C64e(0xac7964b1b11dac64),
  1954. C64e(0x6d23d29c9cf16dd2), C64e(0x3b92e04949723be0),
  1955. C64e(0xc7abb4d8d81fc7b4), C64e(0x1543faacacb915fa),
  1956. C64e(0x09fd07f3f3fa0907), C64e(0x6f8525cfcfa06f25),
  1957. C64e(0xea8fafcaca20eaaf), C64e(0x89f38ef4f47d898e),
  1958. C64e(0x208ee947476720e9), C64e(0x2820181010382818),
  1959. C64e(0x64ded56f6f0b64d5), C64e(0x83fb88f0f0738388),
  1960. C64e(0xb1946f4a4afbb16f), C64e(0x96b8725c5cca9672),
  1961. C64e(0x6c70243838546c24), C64e(0x08aef157575f08f1),
  1962. C64e(0x52e6c773732152c7), C64e(0xf33551979764f351),
  1963. C64e(0x658d23cbcbae6523), C64e(0x84597ca1a125847c),
  1964. C64e(0xbfcb9ce8e857bf9c), C64e(0x637c213e3e5d6321),
  1965. C64e(0x7c37dd9696ea7cdd), C64e(0x7fc2dc61611e7fdc),
  1966. C64e(0x911a860d0d9c9186), C64e(0x941e850f0f9b9485),
  1967. C64e(0xabdb90e0e04bab90), C64e(0xc6f8427c7cbac642),
  1968. C64e(0x57e2c471712657c4), C64e(0xe583aacccc29e5aa),
  1969. C64e(0x733bd89090e373d8), C64e(0x0f0c050606090f05),
  1970. C64e(0x03f501f7f7f40301), C64e(0x3638121c1c2a3612),
  1971. C64e(0xfe9fa3c2c23cfea3), C64e(0xe1d45f6a6a8be15f),
  1972. C64e(0x1047f9aeaebe10f9), C64e(0x6bd2d06969026bd0),
  1973. C64e(0xa82e911717bfa891), C64e(0xe82958999971e858),
  1974. C64e(0x6974273a3a536927), C64e(0xd04eb92727f7d0b9),
  1975. C64e(0x48a938d9d9914838), C64e(0x35cd13ebebde3513),
  1976. C64e(0xce56b32b2be5ceb3), C64e(0x5544332222775533),
  1977. C64e(0xd6bfbbd2d204d6bb), C64e(0x904970a9a9399070),
  1978. C64e(0x800e890707878089), C64e(0xf266a73333c1f2a7),
  1979. C64e(0xc15ab62d2decc1b6), C64e(0x6678223c3c5a6622),
  1980. C64e(0xad2a921515b8ad92), C64e(0x608920c9c9a96020),
  1981. C64e(0xdb154987875cdb49), C64e(0x1a4fffaaaab01aff),
  1982. C64e(0x88a0785050d88878), C64e(0x8e517aa5a52b8e7a),
  1983. C64e(0x8a068f0303898a8f), C64e(0x13b2f859594a13f8),
  1984. C64e(0x9b12800909929b80), C64e(0x3934171a1a233917),
  1985. C64e(0x75cada65651075da), C64e(0x53b531d7d7845331),
  1986. C64e(0x5113c68484d551c6), C64e(0xd3bbb8d0d003d3b8),
  1987. C64e(0x5e1fc38282dc5ec3), C64e(0xcb52b02929e2cbb0),
  1988. C64e(0x99b4775a5ac39977), C64e(0x333c111e1e2d3311),
  1989. C64e(0x46f6cb7b7b3d46cb), C64e(0x1f4bfca8a8b71ffc),
  1990. C64e(0x61dad66d6d0c61d6), C64e(0x4e583a2c2c624e3a)
  1991. };
  1992.  
  1993. #if !SPH_SMALL_FOOTPRINT_GROESTL
  1994.  
  1995. __constant const sph_u64 T5_C[] = {
  1996. C64e(0xa5f497a5c6c632f4), C64e(0x8497eb84f8f86f97),
  1997. C64e(0x99b0c799eeee5eb0), C64e(0x8d8cf78df6f67a8c),
  1998. C64e(0x0d17e50dffffe817), C64e(0xbddcb7bdd6d60adc),
  1999. C64e(0xb1c8a7b1dede16c8), C64e(0x54fc395491916dfc),
  2000. C64e(0x50f0c050606090f0), C64e(0x0305040302020705),
  2001. C64e(0xa9e087a9cece2ee0), C64e(0x7d87ac7d5656d187),
  2002. C64e(0x192bd519e7e7cc2b), C64e(0x62a67162b5b513a6),
  2003. C64e(0xe6319ae64d4d7c31), C64e(0x9ab5c39aecec59b5),
  2004. C64e(0x45cf05458f8f40cf), C64e(0x9dbc3e9d1f1fa3bc),
  2005. C64e(0x40c00940898949c0), C64e(0x8792ef87fafa6892),
  2006. C64e(0x153fc515efefd03f), C64e(0xeb267febb2b29426),
  2007. C64e(0xc94007c98e8ece40), C64e(0x0b1ded0bfbfbe61d),
  2008. C64e(0xec2f82ec41416e2f), C64e(0x67a97d67b3b31aa9),
  2009. C64e(0xfd1cbefd5f5f431c), C64e(0xea258aea45456025),
  2010. C64e(0xbfda46bf2323f9da), C64e(0xf702a6f753535102),
  2011. C64e(0x96a1d396e4e445a1), C64e(0x5bed2d5b9b9b76ed),
  2012. C64e(0xc25deac27575285d), C64e(0x1c24d91ce1e1c524),
  2013. C64e(0xaee97aae3d3dd4e9), C64e(0x6abe986a4c4cf2be),
  2014. C64e(0x5aeed85a6c6c82ee), C64e(0x41c3fc417e7ebdc3),
  2015. C64e(0x0206f102f5f5f306), C64e(0x4fd11d4f838352d1),
  2016. C64e(0x5ce4d05c68688ce4), C64e(0xf407a2f451515607),
  2017. C64e(0x345cb934d1d18d5c), C64e(0x0818e908f9f9e118),
  2018. C64e(0x93aedf93e2e24cae), C64e(0x73954d73abab3e95),
  2019. C64e(0x53f5c453626297f5), C64e(0x3f41543f2a2a6b41),
  2020. C64e(0x0c14100c08081c14), C64e(0x52f63152959563f6),
  2021. C64e(0x65af8c654646e9af), C64e(0x5ee2215e9d9d7fe2),
  2022. C64e(0x2878602830304878), C64e(0xa1f86ea13737cff8),
  2023. C64e(0x0f11140f0a0a1b11), C64e(0xb5c45eb52f2febc4),
  2024. C64e(0x091b1c090e0e151b), C64e(0x365a483624247e5a),
  2025. C64e(0x9bb6369b1b1badb6), C64e(0x3d47a53ddfdf9847),
  2026. C64e(0x266a8126cdcda76a), C64e(0x69bb9c694e4ef5bb),
  2027. C64e(0xcd4cfecd7f7f334c), C64e(0x9fbacf9feaea50ba),
  2028. C64e(0x1b2d241b12123f2d), C64e(0x9eb93a9e1d1da4b9),
  2029. C64e(0x749cb0745858c49c), C64e(0x2e72682e34344672),
  2030. C64e(0x2d776c2d36364177), C64e(0xb2cda3b2dcdc11cd),
  2031. C64e(0xee2973eeb4b49d29), C64e(0xfb16b6fb5b5b4d16),
  2032. C64e(0xf60153f6a4a4a501), C64e(0x4dd7ec4d7676a1d7),
  2033. C64e(0x61a37561b7b714a3), C64e(0xce49face7d7d3449),
  2034. C64e(0x7b8da47b5252df8d), C64e(0x3e42a13edddd9f42),
  2035. C64e(0x7193bc715e5ecd93), C64e(0x97a226971313b1a2),
  2036. C64e(0xf50457f5a6a6a204), C64e(0x68b86968b9b901b8),
  2037. C64e(0x0000000000000000), C64e(0x2c74992cc1c1b574),
  2038. C64e(0x60a080604040e0a0), C64e(0x1f21dd1fe3e3c221),
  2039. C64e(0xc843f2c879793a43), C64e(0xed2c77edb6b69a2c),
  2040. C64e(0xbed9b3bed4d40dd9), C64e(0x46ca01468d8d47ca),
  2041. C64e(0xd970ced967671770), C64e(0x4bdde44b7272afdd),
  2042. C64e(0xde7933de9494ed79), C64e(0xd4672bd49898ff67),
  2043. C64e(0xe8237be8b0b09323), C64e(0x4ade114a85855bde),
  2044. C64e(0x6bbd6d6bbbbb06bd), C64e(0x2a7e912ac5c5bb7e),
  2045. C64e(0xe5349ee54f4f7b34), C64e(0x163ac116ededd73a),
  2046. C64e(0xc55417c58686d254), C64e(0xd7622fd79a9af862),
  2047. C64e(0x55ffcc55666699ff), C64e(0x94a722941111b6a7),
  2048. C64e(0xcf4a0fcf8a8ac04a), C64e(0x1030c910e9e9d930),
  2049. C64e(0x060a080604040e0a), C64e(0x8198e781fefe6698),
  2050. C64e(0xf00b5bf0a0a0ab0b), C64e(0x44ccf0447878b4cc),
  2051. C64e(0xbad54aba2525f0d5), C64e(0xe33e96e34b4b753e),
  2052. C64e(0xf30e5ff3a2a2ac0e), C64e(0xfe19bafe5d5d4419),
  2053. C64e(0xc05b1bc08080db5b), C64e(0x8a850a8a05058085),
  2054. C64e(0xadec7ead3f3fd3ec), C64e(0xbcdf42bc2121fedf),
  2055. C64e(0x48d8e0487070a8d8), C64e(0x040cf904f1f1fd0c),
  2056. C64e(0xdf7ac6df6363197a), C64e(0xc158eec177772f58),
  2057. C64e(0x759f4575afaf309f), C64e(0x63a584634242e7a5),
  2058. C64e(0x3050403020207050), C64e(0x1a2ed11ae5e5cb2e),
  2059. C64e(0x0e12e10efdfdef12), C64e(0x6db7656dbfbf08b7),
  2060. C64e(0x4cd4194c818155d4), C64e(0x143c30141818243c),
  2061. C64e(0x355f4c352626795f), C64e(0x2f719d2fc3c3b271),
  2062. C64e(0xe13867e1bebe8638), C64e(0xa2fd6aa23535c8fd),
  2063. C64e(0xcc4f0bcc8888c74f), C64e(0x394b5c392e2e654b),
  2064. C64e(0x57f93d5793936af9), C64e(0xf20daaf25555580d),
  2065. C64e(0x829de382fcfc619d), C64e(0x47c9f4477a7ab3c9),
  2066. C64e(0xacef8bacc8c827ef), C64e(0xe7326fe7baba8832),
  2067. C64e(0x2b7d642b32324f7d), C64e(0x95a4d795e6e642a4),
  2068. C64e(0xa0fb9ba0c0c03bfb), C64e(0x98b332981919aab3),
  2069. C64e(0xd16827d19e9ef668), C64e(0x7f815d7fa3a32281),
  2070. C64e(0x66aa88664444eeaa), C64e(0x7e82a87e5454d682),
  2071. C64e(0xabe676ab3b3bdde6), C64e(0x839e16830b0b959e),
  2072. C64e(0xca4503ca8c8cc945), C64e(0x297b9529c7c7bc7b),
  2073. C64e(0xd36ed6d36b6b056e), C64e(0x3c44503c28286c44),
  2074. C64e(0x798b5579a7a72c8b), C64e(0xe23d63e2bcbc813d),
  2075. C64e(0x1d272c1d16163127), C64e(0x769a4176adad379a),
  2076. C64e(0x3b4dad3bdbdb964d), C64e(0x56fac85664649efa),
  2077. C64e(0x4ed2e84e7474a6d2), C64e(0x1e22281e14143622),
  2078. C64e(0xdb763fdb9292e476), C64e(0x0a1e180a0c0c121e),
  2079. C64e(0x6cb4906c4848fcb4), C64e(0xe4376be4b8b88f37),
  2080. C64e(0x5de7255d9f9f78e7), C64e(0x6eb2616ebdbd0fb2),
  2081. C64e(0xef2a86ef4343692a), C64e(0xa6f193a6c4c435f1),
  2082. C64e(0xa8e372a83939dae3), C64e(0xa4f762a43131c6f7),
  2083. C64e(0x3759bd37d3d38a59), C64e(0x8b86ff8bf2f27486),
  2084. C64e(0x3256b132d5d58356), C64e(0x43c50d438b8b4ec5),
  2085. C64e(0x59ebdc596e6e85eb), C64e(0xb7c2afb7dada18c2),
  2086. C64e(0x8c8f028c01018e8f), C64e(0x64ac7964b1b11dac),
  2087. C64e(0xd26d23d29c9cf16d), C64e(0xe03b92e04949723b),
  2088. C64e(0xb4c7abb4d8d81fc7), C64e(0xfa1543faacacb915),
  2089. C64e(0x0709fd07f3f3fa09), C64e(0x256f8525cfcfa06f),
  2090. C64e(0xafea8fafcaca20ea), C64e(0x8e89f38ef4f47d89),
  2091. C64e(0xe9208ee947476720), C64e(0x1828201810103828),
  2092. C64e(0xd564ded56f6f0b64), C64e(0x8883fb88f0f07383),
  2093. C64e(0x6fb1946f4a4afbb1), C64e(0x7296b8725c5cca96),
  2094. C64e(0x246c70243838546c), C64e(0xf108aef157575f08),
  2095. C64e(0xc752e6c773732152), C64e(0x51f33551979764f3),
  2096. C64e(0x23658d23cbcbae65), C64e(0x7c84597ca1a12584),
  2097. C64e(0x9cbfcb9ce8e857bf), C64e(0x21637c213e3e5d63),
  2098. C64e(0xdd7c37dd9696ea7c), C64e(0xdc7fc2dc61611e7f),
  2099. C64e(0x86911a860d0d9c91), C64e(0x85941e850f0f9b94),
  2100. C64e(0x90abdb90e0e04bab), C64e(0x42c6f8427c7cbac6),
  2101. C64e(0xc457e2c471712657), C64e(0xaae583aacccc29e5),
  2102. C64e(0xd8733bd89090e373), C64e(0x050f0c050606090f),
  2103. C64e(0x0103f501f7f7f403), C64e(0x123638121c1c2a36),
  2104. C64e(0xa3fe9fa3c2c23cfe), C64e(0x5fe1d45f6a6a8be1),
  2105. C64e(0xf91047f9aeaebe10), C64e(0xd06bd2d06969026b),
  2106. C64e(0x91a82e911717bfa8), C64e(0x58e82958999971e8),
  2107. C64e(0x276974273a3a5369), C64e(0xb9d04eb92727f7d0),
  2108. C64e(0x3848a938d9d99148), C64e(0x1335cd13ebebde35),
  2109. C64e(0xb3ce56b32b2be5ce), C64e(0x3355443322227755),
  2110. C64e(0xbbd6bfbbd2d204d6), C64e(0x70904970a9a93990),
  2111. C64e(0x89800e8907078780), C64e(0xa7f266a73333c1f2),
  2112. C64e(0xb6c15ab62d2decc1), C64e(0x226678223c3c5a66),
  2113. C64e(0x92ad2a921515b8ad), C64e(0x20608920c9c9a960),
  2114. C64e(0x49db154987875cdb), C64e(0xff1a4fffaaaab01a),
  2115. C64e(0x7888a0785050d888), C64e(0x7a8e517aa5a52b8e),
  2116. C64e(0x8f8a068f0303898a), C64e(0xf813b2f859594a13),
  2117. C64e(0x809b12800909929b), C64e(0x173934171a1a2339),
  2118. C64e(0xda75cada65651075), C64e(0x3153b531d7d78453),
  2119. C64e(0xc65113c68484d551), C64e(0xb8d3bbb8d0d003d3),
  2120. C64e(0xc35e1fc38282dc5e), C64e(0xb0cb52b02929e2cb),
  2121. C64e(0x7799b4775a5ac399), C64e(0x11333c111e1e2d33),
  2122. C64e(0xcb46f6cb7b7b3d46), C64e(0xfc1f4bfca8a8b71f),
  2123. C64e(0xd661dad66d6d0c61), C64e(0x3a4e583a2c2c624e)
  2124. };
  2125.  
  2126. __constant const sph_u64 T6_C[] = {
  2127. C64e(0xf4a5f497a5c6c632), C64e(0x978497eb84f8f86f),
  2128. C64e(0xb099b0c799eeee5e), C64e(0x8c8d8cf78df6f67a),
  2129. C64e(0x170d17e50dffffe8), C64e(0xdcbddcb7bdd6d60a),
  2130. C64e(0xc8b1c8a7b1dede16), C64e(0xfc54fc395491916d),
  2131. C64e(0xf050f0c050606090), C64e(0x0503050403020207),
  2132. C64e(0xe0a9e087a9cece2e), C64e(0x877d87ac7d5656d1),
  2133. C64e(0x2b192bd519e7e7cc), C64e(0xa662a67162b5b513),
  2134. C64e(0x31e6319ae64d4d7c), C64e(0xb59ab5c39aecec59),
  2135. C64e(0xcf45cf05458f8f40), C64e(0xbc9dbc3e9d1f1fa3),
  2136. C64e(0xc040c00940898949), C64e(0x928792ef87fafa68),
  2137. C64e(0x3f153fc515efefd0), C64e(0x26eb267febb2b294),
  2138. C64e(0x40c94007c98e8ece), C64e(0x1d0b1ded0bfbfbe6),
  2139. C64e(0x2fec2f82ec41416e), C64e(0xa967a97d67b3b31a),
  2140. C64e(0x1cfd1cbefd5f5f43), C64e(0x25ea258aea454560),
  2141. C64e(0xdabfda46bf2323f9), C64e(0x02f702a6f7535351),
  2142. C64e(0xa196a1d396e4e445), C64e(0xed5bed2d5b9b9b76),
  2143. C64e(0x5dc25deac2757528), C64e(0x241c24d91ce1e1c5),
  2144. C64e(0xe9aee97aae3d3dd4), C64e(0xbe6abe986a4c4cf2),
  2145. C64e(0xee5aeed85a6c6c82), C64e(0xc341c3fc417e7ebd),
  2146. C64e(0x060206f102f5f5f3), C64e(0xd14fd11d4f838352),
  2147. C64e(0xe45ce4d05c68688c), C64e(0x07f407a2f4515156),
  2148. C64e(0x5c345cb934d1d18d), C64e(0x180818e908f9f9e1),
  2149. C64e(0xae93aedf93e2e24c), C64e(0x9573954d73abab3e),
  2150. C64e(0xf553f5c453626297), C64e(0x413f41543f2a2a6b),
  2151. C64e(0x140c14100c08081c), C64e(0xf652f63152959563),
  2152. C64e(0xaf65af8c654646e9), C64e(0xe25ee2215e9d9d7f),
  2153. C64e(0x7828786028303048), C64e(0xf8a1f86ea13737cf),
  2154. C64e(0x110f11140f0a0a1b), C64e(0xc4b5c45eb52f2feb),
  2155. C64e(0x1b091b1c090e0e15), C64e(0x5a365a483624247e),
  2156. C64e(0xb69bb6369b1b1bad), C64e(0x473d47a53ddfdf98),
  2157. C64e(0x6a266a8126cdcda7), C64e(0xbb69bb9c694e4ef5),
  2158. C64e(0x4ccd4cfecd7f7f33), C64e(0xba9fbacf9feaea50),
  2159. C64e(0x2d1b2d241b12123f), C64e(0xb99eb93a9e1d1da4),
  2160. C64e(0x9c749cb0745858c4), C64e(0x722e72682e343446),
  2161. C64e(0x772d776c2d363641), C64e(0xcdb2cda3b2dcdc11),
  2162. C64e(0x29ee2973eeb4b49d), C64e(0x16fb16b6fb5b5b4d),
  2163. C64e(0x01f60153f6a4a4a5), C64e(0xd74dd7ec4d7676a1),
  2164. C64e(0xa361a37561b7b714), C64e(0x49ce49face7d7d34),
  2165. C64e(0x8d7b8da47b5252df), C64e(0x423e42a13edddd9f),
  2166. C64e(0x937193bc715e5ecd), C64e(0xa297a226971313b1),
  2167. C64e(0x04f50457f5a6a6a2), C64e(0xb868b86968b9b901),
  2168. C64e(0x0000000000000000), C64e(0x742c74992cc1c1b5),
  2169. C64e(0xa060a080604040e0), C64e(0x211f21dd1fe3e3c2),
  2170. C64e(0x43c843f2c879793a), C64e(0x2ced2c77edb6b69a),
  2171. C64e(0xd9bed9b3bed4d40d), C64e(0xca46ca01468d8d47),
  2172. C64e(0x70d970ced9676717), C64e(0xdd4bdde44b7272af),
  2173. C64e(0x79de7933de9494ed), C64e(0x67d4672bd49898ff),
  2174. C64e(0x23e8237be8b0b093), C64e(0xde4ade114a85855b),
  2175. C64e(0xbd6bbd6d6bbbbb06), C64e(0x7e2a7e912ac5c5bb),
  2176. C64e(0x34e5349ee54f4f7b), C64e(0x3a163ac116ededd7),
  2177. C64e(0x54c55417c58686d2), C64e(0x62d7622fd79a9af8),
  2178. C64e(0xff55ffcc55666699), C64e(0xa794a722941111b6),
  2179. C64e(0x4acf4a0fcf8a8ac0), C64e(0x301030c910e9e9d9),
  2180. C64e(0x0a060a080604040e), C64e(0x988198e781fefe66),
  2181. C64e(0x0bf00b5bf0a0a0ab), C64e(0xcc44ccf0447878b4),
  2182. C64e(0xd5bad54aba2525f0), C64e(0x3ee33e96e34b4b75),
  2183. C64e(0x0ef30e5ff3a2a2ac), C64e(0x19fe19bafe5d5d44),
  2184. C64e(0x5bc05b1bc08080db), C64e(0x858a850a8a050580),
  2185. C64e(0xecadec7ead3f3fd3), C64e(0xdfbcdf42bc2121fe),
  2186. C64e(0xd848d8e0487070a8), C64e(0x0c040cf904f1f1fd),
  2187. C64e(0x7adf7ac6df636319), C64e(0x58c158eec177772f),
  2188. C64e(0x9f759f4575afaf30), C64e(0xa563a584634242e7),
  2189. C64e(0x5030504030202070), C64e(0x2e1a2ed11ae5e5cb),
  2190. C64e(0x120e12e10efdfdef), C64e(0xb76db7656dbfbf08),
  2191. C64e(0xd44cd4194c818155), C64e(0x3c143c3014181824),
  2192. C64e(0x5f355f4c35262679), C64e(0x712f719d2fc3c3b2),
  2193. C64e(0x38e13867e1bebe86), C64e(0xfda2fd6aa23535c8),
  2194. C64e(0x4fcc4f0bcc8888c7), C64e(0x4b394b5c392e2e65),
  2195. C64e(0xf957f93d5793936a), C64e(0x0df20daaf2555558),
  2196. C64e(0x9d829de382fcfc61), C64e(0xc947c9f4477a7ab3),
  2197. C64e(0xefacef8bacc8c827), C64e(0x32e7326fe7baba88),
  2198. C64e(0x7d2b7d642b32324f), C64e(0xa495a4d795e6e642),
  2199. C64e(0xfba0fb9ba0c0c03b), C64e(0xb398b332981919aa),
  2200. C64e(0x68d16827d19e9ef6), C64e(0x817f815d7fa3a322),
  2201. C64e(0xaa66aa88664444ee), C64e(0x827e82a87e5454d6),
  2202. C64e(0xe6abe676ab3b3bdd), C64e(0x9e839e16830b0b95),
  2203. C64e(0x45ca4503ca8c8cc9), C64e(0x7b297b9529c7c7bc),
  2204. C64e(0x6ed36ed6d36b6b05), C64e(0x443c44503c28286c),
  2205. C64e(0x8b798b5579a7a72c), C64e(0x3de23d63e2bcbc81),
  2206. C64e(0x271d272c1d161631), C64e(0x9a769a4176adad37),
  2207. C64e(0x4d3b4dad3bdbdb96), C64e(0xfa56fac85664649e),
  2208. C64e(0xd24ed2e84e7474a6), C64e(0x221e22281e141436),
  2209. C64e(0x76db763fdb9292e4), C64e(0x1e0a1e180a0c0c12),
  2210. C64e(0xb46cb4906c4848fc), C64e(0x37e4376be4b8b88f),
  2211. C64e(0xe75de7255d9f9f78), C64e(0xb26eb2616ebdbd0f),
  2212. C64e(0x2aef2a86ef434369), C64e(0xf1a6f193a6c4c435),
  2213. C64e(0xe3a8e372a83939da), C64e(0xf7a4f762a43131c6),
  2214. C64e(0x593759bd37d3d38a), C64e(0x868b86ff8bf2f274),
  2215. C64e(0x563256b132d5d583), C64e(0xc543c50d438b8b4e),
  2216. C64e(0xeb59ebdc596e6e85), C64e(0xc2b7c2afb7dada18),
  2217. C64e(0x8f8c8f028c01018e), C64e(0xac64ac7964b1b11d),
  2218. C64e(0x6dd26d23d29c9cf1), C64e(0x3be03b92e0494972),
  2219. C64e(0xc7b4c7abb4d8d81f), C64e(0x15fa1543faacacb9),
  2220. C64e(0x090709fd07f3f3fa), C64e(0x6f256f8525cfcfa0),
  2221. C64e(0xeaafea8fafcaca20), C64e(0x898e89f38ef4f47d),
  2222. C64e(0x20e9208ee9474767), C64e(0x2818282018101038),
  2223. C64e(0x64d564ded56f6f0b), C64e(0x838883fb88f0f073),
  2224. C64e(0xb16fb1946f4a4afb), C64e(0x967296b8725c5cca),
  2225. C64e(0x6c246c7024383854), C64e(0x08f108aef157575f),
  2226. C64e(0x52c752e6c7737321), C64e(0xf351f33551979764),
  2227. C64e(0x6523658d23cbcbae), C64e(0x847c84597ca1a125),
  2228. C64e(0xbf9cbfcb9ce8e857), C64e(0x6321637c213e3e5d),
  2229. C64e(0x7cdd7c37dd9696ea), C64e(0x7fdc7fc2dc61611e),
  2230. C64e(0x9186911a860d0d9c), C64e(0x9485941e850f0f9b),
  2231. C64e(0xab90abdb90e0e04b), C64e(0xc642c6f8427c7cba),
  2232. C64e(0x57c457e2c4717126), C64e(0xe5aae583aacccc29),
  2233. C64e(0x73d8733bd89090e3), C64e(0x0f050f0c05060609),
  2234. C64e(0x030103f501f7f7f4), C64e(0x36123638121c1c2a),
  2235. C64e(0xfea3fe9fa3c2c23c), C64e(0xe15fe1d45f6a6a8b),
  2236. C64e(0x10f91047f9aeaebe), C64e(0x6bd06bd2d0696902),
  2237. C64e(0xa891a82e911717bf), C64e(0xe858e82958999971),
  2238. C64e(0x69276974273a3a53), C64e(0xd0b9d04eb92727f7),
  2239. C64e(0x483848a938d9d991), C64e(0x351335cd13ebebde),
  2240. C64e(0xceb3ce56b32b2be5), C64e(0x5533554433222277),
  2241. C64e(0xd6bbd6bfbbd2d204), C64e(0x9070904970a9a939),
  2242. C64e(0x8089800e89070787), C64e(0xf2a7f266a73333c1),
  2243. C64e(0xc1b6c15ab62d2dec), C64e(0x66226678223c3c5a),
  2244. C64e(0xad92ad2a921515b8), C64e(0x6020608920c9c9a9),
  2245. C64e(0xdb49db154987875c), C64e(0x1aff1a4fffaaaab0),
  2246. C64e(0x887888a0785050d8), C64e(0x8e7a8e517aa5a52b),
  2247. C64e(0x8a8f8a068f030389), C64e(0x13f813b2f859594a),
  2248. C64e(0x9b809b1280090992), C64e(0x39173934171a1a23),
  2249. C64e(0x75da75cada656510), C64e(0x533153b531d7d784),
  2250. C64e(0x51c65113c68484d5), C64e(0xd3b8d3bbb8d0d003),
  2251. C64e(0x5ec35e1fc38282dc), C64e(0xcbb0cb52b02929e2),
  2252. C64e(0x997799b4775a5ac3), C64e(0x3311333c111e1e2d),
  2253. C64e(0x46cb46f6cb7b7b3d), C64e(0x1ffc1f4bfca8a8b7),
  2254. C64e(0x61d661dad66d6d0c), C64e(0x4e3a4e583a2c2c62)
  2255. };
  2256.  
  2257. __constant const sph_u64 T7_C[] = {
  2258. C64e(0x32f4a5f497a5c6c6), C64e(0x6f978497eb84f8f8),
  2259. C64e(0x5eb099b0c799eeee), C64e(0x7a8c8d8cf78df6f6),
  2260. C64e(0xe8170d17e50dffff), C64e(0x0adcbddcb7bdd6d6),
  2261. C64e(0x16c8b1c8a7b1dede), C64e(0x6dfc54fc39549191),
  2262. C64e(0x90f050f0c0506060), C64e(0x0705030504030202),
  2263. C64e(0x2ee0a9e087a9cece), C64e(0xd1877d87ac7d5656),
  2264. C64e(0xcc2b192bd519e7e7), C64e(0x13a662a67162b5b5),
  2265. C64e(0x7c31e6319ae64d4d), C64e(0x59b59ab5c39aecec),
  2266. C64e(0x40cf45cf05458f8f), C64e(0xa3bc9dbc3e9d1f1f),
  2267. C64e(0x49c040c009408989), C64e(0x68928792ef87fafa),
  2268. C64e(0xd03f153fc515efef), C64e(0x9426eb267febb2b2),
  2269. C64e(0xce40c94007c98e8e), C64e(0xe61d0b1ded0bfbfb),
  2270. C64e(0x6e2fec2f82ec4141), C64e(0x1aa967a97d67b3b3),
  2271. C64e(0x431cfd1cbefd5f5f), C64e(0x6025ea258aea4545),
  2272. C64e(0xf9dabfda46bf2323), C64e(0x5102f702a6f75353),
  2273. C64e(0x45a196a1d396e4e4), C64e(0x76ed5bed2d5b9b9b),
  2274. C64e(0x285dc25deac27575), C64e(0xc5241c24d91ce1e1),
  2275. C64e(0xd4e9aee97aae3d3d), C64e(0xf2be6abe986a4c4c),
  2276. C64e(0x82ee5aeed85a6c6c), C64e(0xbdc341c3fc417e7e),
  2277. C64e(0xf3060206f102f5f5), C64e(0x52d14fd11d4f8383),
  2278. C64e(0x8ce45ce4d05c6868), C64e(0x5607f407a2f45151),
  2279. C64e(0x8d5c345cb934d1d1), C64e(0xe1180818e908f9f9),
  2280. C64e(0x4cae93aedf93e2e2), C64e(0x3e9573954d73abab),
  2281. C64e(0x97f553f5c4536262), C64e(0x6b413f41543f2a2a),
  2282. C64e(0x1c140c14100c0808), C64e(0x63f652f631529595),
  2283. C64e(0xe9af65af8c654646), C64e(0x7fe25ee2215e9d9d),
  2284. C64e(0x4878287860283030), C64e(0xcff8a1f86ea13737),
  2285. C64e(0x1b110f11140f0a0a), C64e(0xebc4b5c45eb52f2f),
  2286. C64e(0x151b091b1c090e0e), C64e(0x7e5a365a48362424),
  2287. C64e(0xadb69bb6369b1b1b), C64e(0x98473d47a53ddfdf),
  2288. C64e(0xa76a266a8126cdcd), C64e(0xf5bb69bb9c694e4e),
  2289. C64e(0x334ccd4cfecd7f7f), C64e(0x50ba9fbacf9feaea),
  2290. C64e(0x3f2d1b2d241b1212), C64e(0xa4b99eb93a9e1d1d),
  2291. C64e(0xc49c749cb0745858), C64e(0x46722e72682e3434),
  2292. C64e(0x41772d776c2d3636), C64e(0x11cdb2cda3b2dcdc),
  2293. C64e(0x9d29ee2973eeb4b4), C64e(0x4d16fb16b6fb5b5b),
  2294. C64e(0xa501f60153f6a4a4), C64e(0xa1d74dd7ec4d7676),
  2295. C64e(0x14a361a37561b7b7), C64e(0x3449ce49face7d7d),
  2296. C64e(0xdf8d7b8da47b5252), C64e(0x9f423e42a13edddd),
  2297. C64e(0xcd937193bc715e5e), C64e(0xb1a297a226971313),
  2298. C64e(0xa204f50457f5a6a6), C64e(0x01b868b86968b9b9),
  2299. C64e(0x0000000000000000), C64e(0xb5742c74992cc1c1),
  2300. C64e(0xe0a060a080604040), C64e(0xc2211f21dd1fe3e3),
  2301. C64e(0x3a43c843f2c87979), C64e(0x9a2ced2c77edb6b6),
  2302. C64e(0x0dd9bed9b3bed4d4), C64e(0x47ca46ca01468d8d),
  2303. C64e(0x1770d970ced96767), C64e(0xafdd4bdde44b7272),
  2304. C64e(0xed79de7933de9494), C64e(0xff67d4672bd49898),
  2305. C64e(0x9323e8237be8b0b0), C64e(0x5bde4ade114a8585),
  2306. C64e(0x06bd6bbd6d6bbbbb), C64e(0xbb7e2a7e912ac5c5),
  2307. C64e(0x7b34e5349ee54f4f), C64e(0xd73a163ac116eded),
  2308. C64e(0xd254c55417c58686), C64e(0xf862d7622fd79a9a),
  2309. C64e(0x99ff55ffcc556666), C64e(0xb6a794a722941111),
  2310. C64e(0xc04acf4a0fcf8a8a), C64e(0xd9301030c910e9e9),
  2311. C64e(0x0e0a060a08060404), C64e(0x66988198e781fefe),
  2312. C64e(0xab0bf00b5bf0a0a0), C64e(0xb4cc44ccf0447878),
  2313. C64e(0xf0d5bad54aba2525), C64e(0x753ee33e96e34b4b),
  2314. C64e(0xac0ef30e5ff3a2a2), C64e(0x4419fe19bafe5d5d),
  2315. C64e(0xdb5bc05b1bc08080), C64e(0x80858a850a8a0505),
  2316. C64e(0xd3ecadec7ead3f3f), C64e(0xfedfbcdf42bc2121),
  2317. C64e(0xa8d848d8e0487070), C64e(0xfd0c040cf904f1f1),
  2318. C64e(0x197adf7ac6df6363), C64e(0x2f58c158eec17777),
  2319. C64e(0x309f759f4575afaf), C64e(0xe7a563a584634242),
  2320. C64e(0x7050305040302020), C64e(0xcb2e1a2ed11ae5e5),
  2321. C64e(0xef120e12e10efdfd), C64e(0x08b76db7656dbfbf),
  2322. C64e(0x55d44cd4194c8181), C64e(0x243c143c30141818),
  2323. C64e(0x795f355f4c352626), C64e(0xb2712f719d2fc3c3),
  2324. C64e(0x8638e13867e1bebe), C64e(0xc8fda2fd6aa23535),
  2325. C64e(0xc74fcc4f0bcc8888), C64e(0x654b394b5c392e2e),
  2326. C64e(0x6af957f93d579393), C64e(0x580df20daaf25555),
  2327. C64e(0x619d829de382fcfc), C64e(0xb3c947c9f4477a7a),
  2328. C64e(0x27efacef8bacc8c8), C64e(0x8832e7326fe7baba),
  2329. C64e(0x4f7d2b7d642b3232), C64e(0x42a495a4d795e6e6),
  2330. C64e(0x3bfba0fb9ba0c0c0), C64e(0xaab398b332981919),
  2331. C64e(0xf668d16827d19e9e), C64e(0x22817f815d7fa3a3),
  2332. C64e(0xeeaa66aa88664444), C64e(0xd6827e82a87e5454),
  2333. C64e(0xdde6abe676ab3b3b), C64e(0x959e839e16830b0b),
  2334. C64e(0xc945ca4503ca8c8c), C64e(0xbc7b297b9529c7c7),
  2335. C64e(0x056ed36ed6d36b6b), C64e(0x6c443c44503c2828),
  2336. C64e(0x2c8b798b5579a7a7), C64e(0x813de23d63e2bcbc),
  2337. C64e(0x31271d272c1d1616), C64e(0x379a769a4176adad),
  2338. C64e(0x964d3b4dad3bdbdb), C64e(0x9efa56fac8566464),
  2339. C64e(0xa6d24ed2e84e7474), C64e(0x36221e22281e1414),
  2340. C64e(0xe476db763fdb9292), C64e(0x121e0a1e180a0c0c),
  2341. C64e(0xfcb46cb4906c4848), C64e(0x8f37e4376be4b8b8),
  2342. C64e(0x78e75de7255d9f9f), C64e(0x0fb26eb2616ebdbd),
  2343. C64e(0x692aef2a86ef4343), C64e(0x35f1a6f193a6c4c4),
  2344. C64e(0xdae3a8e372a83939), C64e(0xc6f7a4f762a43131),
  2345. C64e(0x8a593759bd37d3d3), C64e(0x74868b86ff8bf2f2),
  2346. C64e(0x83563256b132d5d5), C64e(0x4ec543c50d438b8b),
  2347. C64e(0x85eb59ebdc596e6e), C64e(0x18c2b7c2afb7dada),
  2348. C64e(0x8e8f8c8f028c0101), C64e(0x1dac64ac7964b1b1),
  2349. C64e(0xf16dd26d23d29c9c), C64e(0x723be03b92e04949),
  2350. C64e(0x1fc7b4c7abb4d8d8), C64e(0xb915fa1543faacac),
  2351. C64e(0xfa090709fd07f3f3), C64e(0xa06f256f8525cfcf),
  2352. C64e(0x20eaafea8fafcaca), C64e(0x7d898e89f38ef4f4),
  2353. C64e(0x6720e9208ee94747), C64e(0x3828182820181010),
  2354. C64e(0x0b64d564ded56f6f), C64e(0x73838883fb88f0f0),
  2355. C64e(0xfbb16fb1946f4a4a), C64e(0xca967296b8725c5c),
  2356. C64e(0x546c246c70243838), C64e(0x5f08f108aef15757),
  2357. C64e(0x2152c752e6c77373), C64e(0x64f351f335519797),
  2358. C64e(0xae6523658d23cbcb), C64e(0x25847c84597ca1a1),
  2359. C64e(0x57bf9cbfcb9ce8e8), C64e(0x5d6321637c213e3e),
  2360. C64e(0xea7cdd7c37dd9696), C64e(0x1e7fdc7fc2dc6161),
  2361. C64e(0x9c9186911a860d0d), C64e(0x9b9485941e850f0f),
  2362. C64e(0x4bab90abdb90e0e0), C64e(0xbac642c6f8427c7c),
  2363. C64e(0x2657c457e2c47171), C64e(0x29e5aae583aacccc),
  2364. C64e(0xe373d8733bd89090), C64e(0x090f050f0c050606),
  2365. C64e(0xf4030103f501f7f7), C64e(0x2a36123638121c1c),
  2366. C64e(0x3cfea3fe9fa3c2c2), C64e(0x8be15fe1d45f6a6a),
  2367. C64e(0xbe10f91047f9aeae), C64e(0x026bd06bd2d06969),
  2368. C64e(0xbfa891a82e911717), C64e(0x71e858e829589999),
  2369. C64e(0x5369276974273a3a), C64e(0xf7d0b9d04eb92727),
  2370. C64e(0x91483848a938d9d9), C64e(0xde351335cd13ebeb),
  2371. C64e(0xe5ceb3ce56b32b2b), C64e(0x7755335544332222),
  2372. C64e(0x04d6bbd6bfbbd2d2), C64e(0x399070904970a9a9),
  2373. C64e(0x878089800e890707), C64e(0xc1f2a7f266a73333),
  2374. C64e(0xecc1b6c15ab62d2d), C64e(0x5a66226678223c3c),
  2375. C64e(0xb8ad92ad2a921515), C64e(0xa96020608920c9c9),
  2376. C64e(0x5cdb49db15498787), C64e(0xb01aff1a4fffaaaa),
  2377. C64e(0xd8887888a0785050), C64e(0x2b8e7a8e517aa5a5),
  2378. C64e(0x898a8f8a068f0303), C64e(0x4a13f813b2f85959),
  2379. C64e(0x929b809b12800909), C64e(0x2339173934171a1a),
  2380. C64e(0x1075da75cada6565), C64e(0x84533153b531d7d7),
  2381. C64e(0xd551c65113c68484), C64e(0x03d3b8d3bbb8d0d0),
  2382. C64e(0xdc5ec35e1fc38282), C64e(0xe2cbb0cb52b02929),
  2383. C64e(0xc3997799b4775a5a), C64e(0x2d3311333c111e1e),
  2384. C64e(0x3d46cb46f6cb7b7b), C64e(0xb71ffc1f4bfca8a8),
  2385. C64e(0x0c61d661dad66d6d), C64e(0x624e3a4e583a2c2c)
  2386. };
  2387.  
  2388. #endif
  2389.  
  2390. #if SPH_SMALL_FOOTPRINT_GROESTL
  2391.  
  2392. #define RBTT(d, a, b0, b1, b2, b3, b4, b5, b6, b7) do { \
  2393. t[d] = T0[B64_0(a[b0])] \
  2394. ^ R64(T0[B64_1(a[b1])], 8) \
  2395. ^ R64(T0[B64_2(a[b2])], 16) \
  2396. ^ R64(T0[B64_3(a[b3])], 24) \
  2397. ^ T4[B64_4(a[b4])] \
  2398. ^ R64(T4[B64_5(a[b5])], 8) \
  2399. ^ R64(T4[B64_6(a[b6])], 16) \
  2400. ^ R64(T4[B64_7(a[b7])], 24); \
  2401. } while (0)
  2402.  
  2403. #else
  2404.  
  2405. #define RBTT(d, a, b0, b1, b2, b3, b4, b5, b6, b7) do { \
  2406. t[d] = T0[B64_0(a[b0])] \
  2407. ^ T1[B64_1(a[b1])] \
  2408. ^ T2[B64_2(a[b2])] \
  2409. ^ T3[B64_3(a[b3])] \
  2410. ^ T4[B64_4(a[b4])] \
  2411. ^ T5[B64_5(a[b5])] \
  2412. ^ T6[B64_6(a[b6])] \
  2413. ^ T7[B64_7(a[b7])]; \
  2414. } while (0)
  2415.  
  2416. #endif
  2417.  
  2418. #if SPH_SMALL_FOOTPRINT_GROESTL
  2419.  
  2420. #define ROUND_BIG_P(a, r) do { \
  2421. sph_u64 t[16]; \
  2422. size_t u; \
  2423. a[0x0] ^= PC64(0x00, r); \
  2424. a[0x1] ^= PC64(0x10, r); \
  2425. a[0x2] ^= PC64(0x20, r); \
  2426. a[0x3] ^= PC64(0x30, r); \
  2427. a[0x4] ^= PC64(0x40, r); \
  2428. a[0x5] ^= PC64(0x50, r); \
  2429. a[0x6] ^= PC64(0x60, r); \
  2430. a[0x7] ^= PC64(0x70, r); \
  2431. a[0x8] ^= PC64(0x80, r); \
  2432. a[0x9] ^= PC64(0x90, r); \
  2433. a[0xA] ^= PC64(0xA0, r); \
  2434. a[0xB] ^= PC64(0xB0, r); \
  2435. a[0xC] ^= PC64(0xC0, r); \
  2436. a[0xD] ^= PC64(0xD0, r); \
  2437. a[0xE] ^= PC64(0xE0, r); \
  2438. a[0xF] ^= PC64(0xF0, r); \
  2439. for (u = 0; u < 16; u += 4) { \
  2440. RBTT(u + 0, a, u + 0, (u + 1) & 0xF, \
  2441. (u + 2) & 0xF, (u + 3) & 0xF, (u + 4) & 0xF, \
  2442. (u + 5) & 0xF, (u + 6) & 0xF, (u + 11) & 0xF); \
  2443. RBTT(u + 1, a, u + 1, (u + 2) & 0xF, \
  2444. (u + 3) & 0xF, (u + 4) & 0xF, (u + 5) & 0xF, \
  2445. (u + 6) & 0xF, (u + 7) & 0xF, (u + 12) & 0xF); \
  2446. RBTT(u + 2, a, u + 2, (u + 3) & 0xF, \
  2447. (u + 4) & 0xF, (u + 5) & 0xF, (u + 6) & 0xF, \
  2448. (u + 7) & 0xF, (u + 8) & 0xF, (u + 13) & 0xF); \
  2449. RBTT(u + 3, a, u + 3, (u + 4) & 0xF, \
  2450. (u + 5) & 0xF, (u + 6) & 0xF, (u + 7) & 0xF, \
  2451. (u + 8) & 0xF, (u + 9) & 0xF, (u + 14) & 0xF); \
  2452. } \
  2453. a[0x0] = t[0x0]; \
  2454. a[0x1] = t[0x1]; \
  2455. a[0x2] = t[0x2]; \
  2456. a[0x3] = t[0x3]; \
  2457. a[0x4] = t[0x4]; \
  2458. a[0x5] = t[0x5]; \
  2459. a[0x6] = t[0x6]; \
  2460. a[0x7] = t[0x7]; \
  2461. a[0x8] = t[0x8]; \
  2462. a[0x9] = t[0x9]; \
  2463. a[0xA] = t[0xA]; \
  2464. a[0xB] = t[0xB]; \
  2465. a[0xC] = t[0xC]; \
  2466. a[0xD] = t[0xD]; \
  2467. a[0xE] = t[0xE]; \
  2468. a[0xF] = t[0xF]; \
  2469. } while (0)
  2470.  
  2471. #define ROUND_BIG_Q(a, r) do { \
  2472. sph_u64 t[16]; \
  2473. size_t u; \
  2474. a[0x0] ^= QC64(0x00, r); \
  2475. a[0x1] ^= QC64(0x10, r); \
  2476. a[0x2] ^= QC64(0x20, r); \
  2477. a[0x3] ^= QC64(0x30, r); \
  2478. a[0x4] ^= QC64(0x40, r); \
  2479. a[0x5] ^= QC64(0x50, r); \
  2480. a[0x6] ^= QC64(0x60, r); \
  2481. a[0x7] ^= QC64(0x70, r); \
  2482. a[0x8] ^= QC64(0x80, r); \
  2483. a[0x9] ^= QC64(0x90, r); \
  2484. a[0xA] ^= QC64(0xA0, r); \
  2485. a[0xB] ^= QC64(0xB0, r); \
  2486. a[0xC] ^= QC64(0xC0, r); \
  2487. a[0xD] ^= QC64(0xD0, r); \
  2488. a[0xE] ^= QC64(0xE0, r); \
  2489. a[0xF] ^= QC64(0xF0, r); \
  2490. for (u = 0; u < 16; u += 4) { \
  2491. RBTT(u + 0, a, (u + 1) & 0xF, (u + 3) & 0xF, \
  2492. (u + 5) & 0xF, (u + 11) & 0xF, (u + 0) & 0xF, \
  2493. (u + 2) & 0xF, (u + 4) & 0xF, (u + 6) & 0xF); \
  2494. RBTT(u + 1, a, (u + 2) & 0xF, (u + 4) & 0xF, \
  2495. (u + 6) & 0xF, (u + 12) & 0xF, (u + 1) & 0xF, \
  2496. (u + 3) & 0xF, (u + 5) & 0xF, (u + 7) & 0xF); \
  2497. RBTT(u + 2, a, (u + 3) & 0xF, (u + 5) & 0xF, \
  2498. (u + 7) & 0xF, (u + 13) & 0xF, (u + 2) & 0xF, \
  2499. (u + 4) & 0xF, (u + 6) & 0xF, (u + 8) & 0xF); \
  2500. RBTT(u + 3, a, (u + 4) & 0xF, (u + 6) & 0xF, \
  2501. (u + 8) & 0xF, (u + 14) & 0xF, (u + 3) & 0xF, \
  2502. (u + 5) & 0xF, (u + 7) & 0xF, (u + 9) & 0xF); \
  2503. } \
  2504. a[0x0] = t[0x0]; \
  2505. a[0x1] = t[0x1]; \
  2506. a[0x2] = t[0x2]; \
  2507. a[0x3] = t[0x3]; \
  2508. a[0x4] = t[0x4]; \
  2509. a[0x5] = t[0x5]; \
  2510. a[0x6] = t[0x6]; \
  2511. a[0x7] = t[0x7]; \
  2512. a[0x8] = t[0x8]; \
  2513. a[0x9] = t[0x9]; \
  2514. a[0xA] = t[0xA]; \
  2515. a[0xB] = t[0xB]; \
  2516. a[0xC] = t[0xC]; \
  2517. a[0xD] = t[0xD]; \
  2518. a[0xE] = t[0xE]; \
  2519. a[0xF] = t[0xF]; \
  2520. } while (0)
  2521.  
  2522. #else
  2523.  
  2524. #define ROUND_BIG_P(a, r) do { \
  2525. sph_u64 t[16]; \
  2526. a[0x0] ^= PC64(0x00, r); \
  2527. a[0x1] ^= PC64(0x10, r); \
  2528. a[0x2] ^= PC64(0x20, r); \
  2529. a[0x3] ^= PC64(0x30, r); \
  2530. a[0x4] ^= PC64(0x40, r); \
  2531. a[0x5] ^= PC64(0x50, r); \
  2532. a[0x6] ^= PC64(0x60, r); \
  2533. a[0x7] ^= PC64(0x70, r); \
  2534. a[0x8] ^= PC64(0x80, r); \
  2535. a[0x9] ^= PC64(0x90, r); \
  2536. a[0xA] ^= PC64(0xA0, r); \
  2537. a[0xB] ^= PC64(0xB0, r); \
  2538. a[0xC] ^= PC64(0xC0, r); \
  2539. a[0xD] ^= PC64(0xD0, r); \
  2540. a[0xE] ^= PC64(0xE0, r); \
  2541. a[0xF] ^= PC64(0xF0, r); \
  2542. RBTT(0x0, a, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0xB); \
  2543. RBTT(0x1, a, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xC); \
  2544. RBTT(0x2, a, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0xD); \
  2545. RBTT(0x3, a, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xE); \
  2546. RBTT(0x4, a, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xF); \
  2547. RBTT(0x5, a, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0x0); \
  2548. RBTT(0x6, a, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0x1); \
  2549. RBTT(0x7, a, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0x2); \
  2550. RBTT(0x8, a, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0x3); \
  2551. RBTT(0x9, a, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x4); \
  2552. RBTT(0xA, a, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x0, 0x5); \
  2553. RBTT(0xB, a, 0xB, 0xC, 0xD, 0xE, 0xF, 0x0, 0x1, 0x6); \
  2554. RBTT(0xC, a, 0xC, 0xD, 0xE, 0xF, 0x0, 0x1, 0x2, 0x7); \
  2555. RBTT(0xD, a, 0xD, 0xE, 0xF, 0x0, 0x1, 0x2, 0x3, 0x8); \
  2556. RBTT(0xE, a, 0xE, 0xF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x9); \
  2557. RBTT(0xF, a, 0xF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xA); \
  2558. a[0x0] = t[0x0]; \
  2559. a[0x1] = t[0x1]; \
  2560. a[0x2] = t[0x2]; \
  2561. a[0x3] = t[0x3]; \
  2562. a[0x4] = t[0x4]; \
  2563. a[0x5] = t[0x5]; \
  2564. a[0x6] = t[0x6]; \
  2565. a[0x7] = t[0x7]; \
  2566. a[0x8] = t[0x8]; \
  2567. a[0x9] = t[0x9]; \
  2568. a[0xA] = t[0xA]; \
  2569. a[0xB] = t[0xB]; \
  2570. a[0xC] = t[0xC]; \
  2571. a[0xD] = t[0xD]; \
  2572. a[0xE] = t[0xE]; \
  2573. a[0xF] = t[0xF]; \
  2574. } while (0)
  2575.  
  2576. #define ROUND_BIG_Q(a, r) do { \
  2577. sph_u64 t[16]; \
  2578. a[0x0] ^= QC64(0x00, r); \
  2579. a[0x1] ^= QC64(0x10, r); \
  2580. a[0x2] ^= QC64(0x20, r); \
  2581. a[0x3] ^= QC64(0x30, r); \
  2582. a[0x4] ^= QC64(0x40, r); \
  2583. a[0x5] ^= QC64(0x50, r); \
  2584. a[0x6] ^= QC64(0x60, r); \
  2585. a[0x7] ^= QC64(0x70, r); \
  2586. a[0x8] ^= QC64(0x80, r); \
  2587. a[0x9] ^= QC64(0x90, r); \
  2588. a[0xA] ^= QC64(0xA0, r); \
  2589. a[0xB] ^= QC64(0xB0, r); \
  2590. a[0xC] ^= QC64(0xC0, r); \
  2591. a[0xD] ^= QC64(0xD0, r); \
  2592. a[0xE] ^= QC64(0xE0, r); \
  2593. a[0xF] ^= QC64(0xF0, r); \
  2594. RBTT(0x0, a, 0x1, 0x3, 0x5, 0xB, 0x0, 0x2, 0x4, 0x6); \
  2595. RBTT(0x1, a, 0x2, 0x4, 0x6, 0xC, 0x1, 0x3, 0x5, 0x7); \
  2596. RBTT(0x2, a, 0x3, 0x5, 0x7, 0xD, 0x2, 0x4, 0x6, 0x8); \
  2597. RBTT(0x3, a, 0x4, 0x6, 0x8, 0xE, 0x3, 0x5, 0x7, 0x9); \
  2598. RBTT(0x4, a, 0x5, 0x7, 0x9, 0xF, 0x4, 0x6, 0x8, 0xA); \
  2599. RBTT(0x5, a, 0x6, 0x8, 0xA, 0x0, 0x5, 0x7, 0x9, 0xB); \
  2600. RBTT(0x6, a, 0x7, 0x9, 0xB, 0x1, 0x6, 0x8, 0xA, 0xC); \
  2601. RBTT(0x7, a, 0x8, 0xA, 0xC, 0x2, 0x7, 0x9, 0xB, 0xD); \
  2602. RBTT(0x8, a, 0x9, 0xB, 0xD, 0x3, 0x8, 0xA, 0xC, 0xE); \
  2603. RBTT(0x9, a, 0xA, 0xC, 0xE, 0x4, 0x9, 0xB, 0xD, 0xF); \
  2604. RBTT(0xA, a, 0xB, 0xD, 0xF, 0x5, 0xA, 0xC, 0xE, 0x0); \
  2605. RBTT(0xB, a, 0xC, 0xE, 0x0, 0x6, 0xB, 0xD, 0xF, 0x1); \
  2606. RBTT(0xC, a, 0xD, 0xF, 0x1, 0x7, 0xC, 0xE, 0x0, 0x2); \
  2607. RBTT(0xD, a, 0xE, 0x0, 0x2, 0x8, 0xD, 0xF, 0x1, 0x3); \
  2608. RBTT(0xE, a, 0xF, 0x1, 0x3, 0x9, 0xE, 0x0, 0x2, 0x4); \
  2609. RBTT(0xF, a, 0x0, 0x2, 0x4, 0xA, 0xF, 0x1, 0x3, 0x5); \
  2610. a[0x0] = t[0x0]; \
  2611. a[0x1] = t[0x1]; \
  2612. a[0x2] = t[0x2]; \
  2613. a[0x3] = t[0x3]; \
  2614. a[0x4] = t[0x4]; \
  2615. a[0x5] = t[0x5]; \
  2616. a[0x6] = t[0x6]; \
  2617. a[0x7] = t[0x7]; \
  2618. a[0x8] = t[0x8]; \
  2619. a[0x9] = t[0x9]; \
  2620. a[0xA] = t[0xA]; \
  2621. a[0xB] = t[0xB]; \
  2622. a[0xC] = t[0xC]; \
  2623. a[0xD] = t[0xD]; \
  2624. a[0xE] = t[0xE]; \
  2625. a[0xF] = t[0xF]; \
  2626. } while (0)
  2627.  
  2628. #endif
  2629.  
  2630. #define PERM_BIG_P(a) do { \
  2631. int r; \
  2632. for (r = 0; r < 14; r += 2) { \
  2633. ROUND_BIG_P(a, r + 0); \
  2634. ROUND_BIG_P(a, r + 1); \
  2635. } \
  2636. } while (0)
  2637.  
  2638. #define PERM_BIG_Q(a) do { \
  2639. int r; \
  2640. for (r = 0; r < 14; r += 2) { \
  2641. ROUND_BIG_Q(a, r + 0); \
  2642. ROUND_BIG_Q(a, r + 1); \
  2643. } \
  2644. } while (0)
  2645.  
  2646. /* $Id: jh.c 255 2011-06-07 19:50:20Z tp $ */
  2647. /*
  2648. * JH implementation.
  2649. *
  2650. * ==========================(LICENSE BEGIN)============================
  2651. *
  2652. * Copyright (c) 2007-2010 Projet RNRT SAPHIR
  2653. *
  2654. * Permission is hereby granted, free of charge, to any person obtaining
  2655. * a copy of this software and associated documentation files (the
  2656. * "Software"), to deal in the Software without restriction, including
  2657. * without limitation the rights to use, copy, modify, merge, publish,
  2658. * distribute, sublicense, and/or sell copies of the Software, and to
  2659. * permit persons to whom the Software is furnished to do so, subject to
  2660. * the following conditions:
  2661. *
  2662. * The above copyright notice and this permission notice shall be
  2663. * included in all copies or substantial portions of the Software.
  2664. *
  2665. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  2666. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  2667. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  2668. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  2669. * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  2670. * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  2671. * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  2672. *
  2673. * ===========================(LICENSE END)=============================
  2674. *
  2675. * @author Thomas Pornin <thomas.pornin@cryptolog.com>
  2676. */
  2677.  
  2678. #if !defined SPH_JH_64 && SPH_64_TRUE
  2679. #define SPH_JH_64 1
  2680. #endif
  2681.  
  2682. /*
  2683. * The internal bitslice representation may use either big-endian or
  2684. * little-endian (true bitslice operations do not care about the bit
  2685. * ordering, and the bit-swapping linear operations in JH happen to
  2686. * be invariant through endianness-swapping). The constants must be
  2687. * defined according to the chosen endianness; we use some
  2688. * byte-swapping macros for that.
  2689. */
  2690.  
  2691. #if SPH_LITTLE_ENDIAN
  2692.  
  2693. #define C32e(x) ((SPH_C32(x) >> 24) \
  2694. | ((SPH_C32(x) >> 8) & SPH_C32(0x0000FF00)) \
  2695. | ((SPH_C32(x) << 8) & SPH_C32(0x00FF0000)) \
  2696. | ((SPH_C32(x) << 24) & SPH_C32(0xFF000000)))
  2697. #define dec32e_aligned sph_dec32le_aligned
  2698. #define enc32e sph_enc32le
  2699.  
  2700. #define C64e(x) ((SPH_C64(x) >> 56) \
  2701. | ((SPH_C64(x) >> 40) & SPH_C64(0x000000000000FF00)) \
  2702. | ((SPH_C64(x) >> 24) & SPH_C64(0x0000000000FF0000)) \
  2703. | ((SPH_C64(x) >> 8) & SPH_C64(0x00000000FF000000)) \
  2704. | ((SPH_C64(x) << 8) & SPH_C64(0x000000FF00000000)) \
  2705. | ((SPH_C64(x) << 24) & SPH_C64(0x0000FF0000000000)) \
  2706. | ((SPH_C64(x) << 40) & SPH_C64(0x00FF000000000000)) \
  2707. | ((SPH_C64(x) << 56) & SPH_C64(0xFF00000000000000)))
  2708. #define dec64e_aligned sph_dec64le_aligned
  2709. #define enc64e sph_enc64le
  2710.  
  2711. #else
  2712.  
  2713. #define C32e(x) SPH_C32(x)
  2714. #define dec32e_aligned sph_dec32be_aligned
  2715. #define enc32e sph_enc32be
  2716. #define C64e(x) SPH_C64(x)
  2717. #define dec64e_aligned sph_dec64be_aligned
  2718. #define enc64e sph_enc64be
  2719.  
  2720. #endif
  2721.  
  2722. #define Sb(x0, x1, x2, x3, c) do { \
  2723. x3 = ~x3; \
  2724. x0 ^= (c) & ~x2; \
  2725. tmp = (c) ^ (x0 & x1); \
  2726. x0 ^= x2 & x3; \
  2727. x3 ^= ~x1 & x2; \
  2728. x1 ^= x0 & x2; \
  2729. x2 ^= x0 & ~x3; \
  2730. x0 ^= x1 | x3; \
  2731. x3 ^= x1 & x2; \
  2732. x1 ^= tmp & x0; \
  2733. x2 ^= tmp; \
  2734. } while (0)
  2735.  
  2736. #define Lb(x0, x1, x2, x3, x4, x5, x6, x7) do { \
  2737. x4 ^= x1; \
  2738. x5 ^= x2; \
  2739. x6 ^= x3 ^ x0; \
  2740. x7 ^= x0; \
  2741. x0 ^= x5; \
  2742. x1 ^= x6; \
  2743. x2 ^= x7 ^ x4; \
  2744. x3 ^= x4; \
  2745. } while (0)
  2746.  
  2747. __constant const sph_u64 C[] = {
  2748. C64e(0x72d5dea2df15f867), C64e(0x7b84150ab7231557),
  2749. C64e(0x81abd6904d5a87f6), C64e(0x4e9f4fc5c3d12b40),
  2750. C64e(0xea983ae05c45fa9c), C64e(0x03c5d29966b2999a),
  2751. C64e(0x660296b4f2bb538a), C64e(0xb556141a88dba231),
  2752. C64e(0x03a35a5c9a190edb), C64e(0x403fb20a87c14410),
  2753. C64e(0x1c051980849e951d), C64e(0x6f33ebad5ee7cddc),
  2754. C64e(0x10ba139202bf6b41), C64e(0xdc786515f7bb27d0),
  2755. C64e(0x0a2c813937aa7850), C64e(0x3f1abfd2410091d3),
  2756. C64e(0x422d5a0df6cc7e90), C64e(0xdd629f9c92c097ce),
  2757. C64e(0x185ca70bc72b44ac), C64e(0xd1df65d663c6fc23),
  2758. C64e(0x976e6c039ee0b81a), C64e(0x2105457e446ceca8),
  2759. C64e(0xeef103bb5d8e61fa), C64e(0xfd9697b294838197),
  2760. C64e(0x4a8e8537db03302f), C64e(0x2a678d2dfb9f6a95),
  2761. C64e(0x8afe7381f8b8696c), C64e(0x8ac77246c07f4214),
  2762. C64e(0xc5f4158fbdc75ec4), C64e(0x75446fa78f11bb80),
  2763. C64e(0x52de75b7aee488bc), C64e(0x82b8001e98a6a3f4),
  2764. C64e(0x8ef48f33a9a36315), C64e(0xaa5f5624d5b7f989),
  2765. C64e(0xb6f1ed207c5ae0fd), C64e(0x36cae95a06422c36),
  2766. C64e(0xce2935434efe983d), C64e(0x533af974739a4ba7),
  2767. C64e(0xd0f51f596f4e8186), C64e(0x0e9dad81afd85a9f),
  2768. C64e(0xa7050667ee34626a), C64e(0x8b0b28be6eb91727),
  2769. C64e(0x47740726c680103f), C64e(0xe0a07e6fc67e487b),
  2770. C64e(0x0d550aa54af8a4c0), C64e(0x91e3e79f978ef19e),
  2771. C64e(0x8676728150608dd4), C64e(0x7e9e5a41f3e5b062),
  2772. C64e(0xfc9f1fec4054207a), C64e(0xe3e41a00cef4c984),
  2773. C64e(0x4fd794f59dfa95d8), C64e(0x552e7e1124c354a5),
  2774. C64e(0x5bdf7228bdfe6e28), C64e(0x78f57fe20fa5c4b2),
  2775. C64e(0x05897cefee49d32e), C64e(0x447e9385eb28597f),
  2776. C64e(0x705f6937b324314a), C64e(0x5e8628f11dd6e465),
  2777. C64e(0xc71b770451b920e7), C64e(0x74fe43e823d4878a),
  2778. C64e(0x7d29e8a3927694f2), C64e(0xddcb7a099b30d9c1),
  2779. C64e(0x1d1b30fb5bdc1be0), C64e(0xda24494ff29c82bf),
  2780. C64e(0xa4e7ba31b470bfff), C64e(0x0d324405def8bc48),
  2781. C64e(0x3baefc3253bbd339), C64e(0x459fc3c1e0298ba0),
  2782. C64e(0xe5c905fdf7ae090f), C64e(0x947034124290f134),
  2783. C64e(0xa271b701e344ed95), C64e(0xe93b8e364f2f984a),
  2784. C64e(0x88401d63a06cf615), C64e(0x47c1444b8752afff),
  2785. C64e(0x7ebb4af1e20ac630), C64e(0x4670b6c5cc6e8ce6),
  2786. C64e(0xa4d5a456bd4fca00), C64e(0xda9d844bc83e18ae),
  2787. C64e(0x7357ce453064d1ad), C64e(0xe8a6ce68145c2567),
  2788. C64e(0xa3da8cf2cb0ee116), C64e(0x33e906589a94999a),
  2789. C64e(0x1f60b220c26f847b), C64e(0xd1ceac7fa0d18518),
  2790. C64e(0x32595ba18ddd19d3), C64e(0x509a1cc0aaa5b446),
  2791. C64e(0x9f3d6367e4046bba), C64e(0xf6ca19ab0b56ee7e),
  2792. C64e(0x1fb179eaa9282174), C64e(0xe9bdf7353b3651ee),
  2793. C64e(0x1d57ac5a7550d376), C64e(0x3a46c2fea37d7001),
  2794. C64e(0xf735c1af98a4d842), C64e(0x78edec209e6b6779),
  2795. C64e(0x41836315ea3adba8), C64e(0xfac33b4d32832c83),
  2796. C64e(0xa7403b1f1c2747f3), C64e(0x5940f034b72d769a),
  2797. C64e(0xe73e4e6cd2214ffd), C64e(0xb8fd8d39dc5759ef),
  2798. C64e(0x8d9b0c492b49ebda), C64e(0x5ba2d74968f3700d),
  2799. C64e(0x7d3baed07a8d5584), C64e(0xf5a5e9f0e4f88e65),
  2800. C64e(0xa0b8a2f436103b53), C64e(0x0ca8079e753eec5a),
  2801. C64e(0x9168949256e8884f), C64e(0x5bb05c55f8babc4c),
  2802. C64e(0xe3bb3b99f387947b), C64e(0x75daf4d6726b1c5d),
  2803. C64e(0x64aeac28dc34b36d), C64e(0x6c34a550b828db71),
  2804. C64e(0xf861e2f2108d512a), C64e(0xe3db643359dd75fc),
  2805. C64e(0x1cacbcf143ce3fa2), C64e(0x67bbd13c02e843b0),
  2806. C64e(0x330a5bca8829a175), C64e(0x7f34194db416535c),
  2807. C64e(0x923b94c30e794d1e), C64e(0x797475d7b6eeaf3f),
  2808. C64e(0xeaa8d4f7be1a3921), C64e(0x5cf47e094c232751),
  2809. C64e(0x26a32453ba323cd2), C64e(0x44a3174a6da6d5ad),
  2810. C64e(0xb51d3ea6aff2c908), C64e(0x83593d98916b3c56),
  2811. C64e(0x4cf87ca17286604d), C64e(0x46e23ecc086ec7f6),
  2812. C64e(0x2f9833b3b1bc765e), C64e(0x2bd666a5efc4e62a),
  2813. C64e(0x06f4b6e8bec1d436), C64e(0x74ee8215bcef2163),
  2814. C64e(0xfdc14e0df453c969), C64e(0xa77d5ac406585826),
  2815. C64e(0x7ec1141606e0fa16), C64e(0x7e90af3d28639d3f),
  2816. C64e(0xd2c9f2e3009bd20c), C64e(0x5faace30b7d40c30),
  2817. C64e(0x742a5116f2e03298), C64e(0x0deb30d8e3cef89a),
  2818. C64e(0x4bc59e7bb5f17992), C64e(0xff51e66e048668d3),
  2819. C64e(0x9b234d57e6966731), C64e(0xcce6a6f3170a7505),
  2820. C64e(0xb17681d913326cce), C64e(0x3c175284f805a262),
  2821. C64e(0xf42bcbb378471547), C64e(0xff46548223936a48),
  2822. C64e(0x38df58074e5e6565), C64e(0xf2fc7c89fc86508e),
  2823. C64e(0x31702e44d00bca86), C64e(0xf04009a23078474e),
  2824. C64e(0x65a0ee39d1f73883), C64e(0xf75ee937e42c3abd),
  2825. C64e(0x2197b2260113f86f), C64e(0xa344edd1ef9fdee7),
  2826. C64e(0x8ba0df15762592d9), C64e(0x3c85f7f612dc42be),
  2827. C64e(0xd8a7ec7cab27b07e), C64e(0x538d7ddaaa3ea8de),
  2828. C64e(0xaa25ce93bd0269d8), C64e(0x5af643fd1a7308f9),
  2829. C64e(0xc05fefda174a19a5), C64e(0x974d66334cfd216a),
  2830. C64e(0x35b49831db411570), C64e(0xea1e0fbbedcd549b),
  2831. C64e(0x9ad063a151974072), C64e(0xf6759dbf91476fe2)
  2832. };
  2833.  
  2834. #define Ceven_hi(r) (C[((r) << 2) + 0])
  2835. #define Ceven_lo(r) (C[((r) << 2) + 1])
  2836. #define Codd_hi(r) (C[((r) << 2) + 2])
  2837. #define Codd_lo(r) (C[((r) << 2) + 3])
  2838.  
  2839. #define S(x0, x1, x2, x3, cb, r) do { \
  2840. Sb(x0 ## h, x1 ## h, x2 ## h, x3 ## h, cb ## hi(r)); \
  2841. Sb(x0 ## l, x1 ## l, x2 ## l, x3 ## l, cb ## lo(r)); \
  2842. } while (0)
  2843.  
  2844. #define L(x0, x1, x2, x3, x4, x5, x6, x7) do { \
  2845. Lb(x0 ## h, x1 ## h, x2 ## h, x3 ## h, \
  2846. x4 ## h, x5 ## h, x6 ## h, x7 ## h); \
  2847. Lb(x0 ## l, x1 ## l, x2 ## l, x3 ## l, \
  2848. x4 ## l, x5 ## l, x6 ## l, x7 ## l); \
  2849. } while (0)
  2850.  
  2851. #define Wz(x, c, n) do { \
  2852. sph_u64 t = (x ## h & (c)) << (n); \
  2853. x ## h = ((x ## h >> (n)) & (c)) | t; \
  2854. t = (x ## l & (c)) << (n); \
  2855. x ## l = ((x ## l >> (n)) & (c)) | t; \
  2856. } while (0)
  2857.  
  2858. #define W0(x) Wz(x, SPH_C64(0x5555555555555555), 1)
  2859. #define W1(x) Wz(x, SPH_C64(0x3333333333333333), 2)
  2860. #define W2(x) Wz(x, SPH_C64(0x0F0F0F0F0F0F0F0F), 4)
  2861. #define W3(x) Wz(x, SPH_C64(0x00FF00FF00FF00FF), 8)
  2862. #define W4(x) Wz(x, SPH_C64(0x0000FFFF0000FFFF), 16)
  2863. #define W5(x) Wz(x, SPH_C64(0x00000000FFFFFFFF), 32)
  2864. #define W6(x) do { \
  2865. sph_u64 t = x ## h; \
  2866. x ## h = x ## l; \
  2867. x ## l = t; \
  2868. } while (0)
  2869.  
  2870. __constant const sph_u64 JH_IV512[] = {
  2871. C64e(0x6fd14b963e00aa17), C64e(0x636a2e057a15d543),
  2872. C64e(0x8a225e8d0c97ef0b), C64e(0xe9341259f2b3c361),
  2873. C64e(0x891da0c1536f801e), C64e(0x2aa9056bea2b6d80),
  2874. C64e(0x588eccdb2075baa6), C64e(0xa90f3a76baf83bf7),
  2875. C64e(0x0169e60541e34a69), C64e(0x46b58a8e2e6fe65a),
  2876. C64e(0x1047a7d0c1843c24), C64e(0x3b6e71b12d5ac199),
  2877. C64e(0xcf57f6ec9db1f856), C64e(0xa706887c5716b156),
  2878. C64e(0xe3c2fcdfe68517fb), C64e(0x545a4678cc8cdd4b)
  2879. };
  2880.  
  2881. #define SL(ro) SLu(r + ro, ro)
  2882.  
  2883. #define SLu(r, ro) do { \
  2884. S(h0, h2, h4, h6, Ceven_, r); \
  2885. S(h1, h3, h5, h7, Codd_, r); \
  2886. L(h0, h2, h4, h6, h1, h3, h5, h7); \
  2887. W ## ro(h1); \
  2888. W ## ro(h3); \
  2889. W ## ro(h5); \
  2890. W ## ro(h7); \
  2891. } while (0)
  2892.  
  2893. #if SPH_SMALL_FOOTPRINT_JH
  2894.  
  2895. /*
  2896. * The "small footprint" 64-bit version just uses a partially unrolled
  2897. * loop.
  2898. */
  2899.  
  2900. #define E8 do { \
  2901. unsigned r; \
  2902. for (r = 0; r < 42; r += 7) { \
  2903. SL(0); \
  2904. SL(1); \
  2905. SL(2); \
  2906. SL(3); \
  2907. SL(4); \
  2908. SL(5); \
  2909. SL(6); \
  2910. } \
  2911. } while (0)
  2912.  
  2913. #else
  2914.  
  2915. /*
  2916. * On a "true 64-bit" architecture, we can unroll at will.
  2917. */
  2918.  
  2919. #define E8 do { \
  2920. SLu( 0, 0); \
  2921. SLu( 1, 1); \
  2922. SLu( 2, 2); \
  2923. SLu( 3, 3); \
  2924. SLu( 4, 4); \
  2925. SLu( 5, 5); \
  2926. SLu( 6, 6); \
  2927. SLu( 7, 0); \
  2928. SLu( 8, 1); \
  2929. SLu( 9, 2); \
  2930. SLu(10, 3); \
  2931. SLu(11, 4); \
  2932. SLu(12, 5); \
  2933. SLu(13, 6); \
  2934. SLu(14, 0); \
  2935. SLu(15, 1); \
  2936. SLu(16, 2); \
  2937. SLu(17, 3); \
  2938. SLu(18, 4); \
  2939. SLu(19, 5); \
  2940. SLu(20, 6); \
  2941. SLu(21, 0); \
  2942. SLu(22, 1); \
  2943. SLu(23, 2); \
  2944. SLu(24, 3); \
  2945. SLu(25, 4); \
  2946. SLu(26, 5); \
  2947. SLu(27, 6); \
  2948. SLu(28, 0); \
  2949. SLu(29, 1); \
  2950. SLu(30, 2); \
  2951. SLu(31, 3); \
  2952. SLu(32, 4); \
  2953. SLu(33, 5); \
  2954. SLu(34, 6); \
  2955. SLu(35, 0); \
  2956. SLu(36, 1); \
  2957. SLu(37, 2); \
  2958. SLu(38, 3); \
  2959. SLu(39, 4); \
  2960. SLu(40, 5); \
  2961. SLu(41, 6); \
  2962. } while (0)
  2963.  
  2964. #endif
  2965.  
  2966. /* $Id: keccak.c 259 2011-07-19 22:11:27Z tp $ */
  2967. /*
  2968. * Keccak implementation.
  2969. *
  2970. * ==========================(LICENSE BEGIN)============================
  2971. *
  2972. * Copyright (c) 2007-2010 Projet RNRT SAPHIR
  2973. *
  2974. * Permission is hereby granted, free of charge, to any person obtaining
  2975. * a copy of this software and associated documentation files (the
  2976. * "Software"), to deal in the Software without restriction, including
  2977. * without limitation the rights to use, copy, modify, merge, publish,
  2978. * distribute, sublicense, and/or sell copies of the Software, and to
  2979. * permit persons to whom the Software is furnished to do so, subject to
  2980. * the following conditions:
  2981. *
  2982. * The above copyright notice and this permission notice shall be
  2983. * included in all copies or substantial portions of the Software.
  2984. *
  2985. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  2986. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  2987. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  2988. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  2989. * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  2990. * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  2991. * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  2992. *
  2993. * ===========================(LICENSE END)=============================
  2994. *
  2995. * @author Thomas Pornin <thomas.pornin@cryptolog.com>
  2996. */
  2997.  
  2998. #ifdef __cplusplus
  2999. extern "C"{
  3000. #endif
  3001.  
  3002. /*
  3003. * Parameters:
  3004. *
  3005. * SPH_KECCAK_64 use a 64-bit type
  3006. * SPH_KECCAK_UNROLL number of loops to unroll (0/undef for full unroll)
  3007. * SPH_KECCAK_INTERLEAVE use bit-interleaving (32-bit type only)
  3008. * SPH_KECCAK_NOCOPY do not copy the state into local variables
  3009. *
  3010. * If there is no usable 64-bit type, the code automatically switches
  3011. * back to the 32-bit implementation.
  3012. *
  3013. * Some tests on an Intel Core2 Q6600 (both 64-bit and 32-bit, 32 kB L1
  3014. * code cache), a PowerPC (G3, 32 kB L1 code cache), an ARM920T core
  3015. * (16 kB L1 code cache), and a small MIPS-compatible CPU (Broadcom BCM3302,
  3016. * 8 kB L1 code cache), seem to show that the following are optimal:
  3017. *
  3018. * -- x86, 64-bit: use the 64-bit implementation, unroll 8 rounds,
  3019. * do not copy the state; unrolling 2, 6 or all rounds also provides
  3020. * near-optimal performance.
  3021. * -- x86, 32-bit: use the 32-bit implementation, unroll 6 rounds,
  3022. * interleave, do not copy the state. Unrolling 1, 2, 4 or 8 rounds
  3023. * also provides near-optimal performance.
  3024. * -- PowerPC: use the 64-bit implementation, unroll 8 rounds,
  3025. * copy the state. Unrolling 4 or 6 rounds is near-optimal.
  3026. * -- ARM: use the 64-bit implementation, unroll 2 or 4 rounds,
  3027. * copy the state.
  3028. * -- MIPS: use the 64-bit implementation, unroll 2 rounds, copy
  3029. * the state. Unrolling only 1 round is also near-optimal.
  3030. *
  3031. * Also, interleaving does not always yield actual improvements when
  3032. * using a 32-bit implementation; in particular when the architecture
  3033. * does not offer a native rotation opcode (interleaving replaces one
  3034. * 64-bit rotation with two 32-bit rotations, which is a gain only if
  3035. * there is a native 32-bit rotation opcode and not a native 64-bit
  3036. * rotation opcode; also, interleaving implies a small overhead when
  3037. * processing input words).
  3038. *
  3039. * To sum up:
  3040. * -- when possible, use the 64-bit code
  3041. * -- exception: on 32-bit x86, use 32-bit code
  3042. * -- when using 32-bit code, use interleaving
  3043. * -- copy the state, except on x86
  3044. * -- unroll 8 rounds on "big" machine, 2 rounds on "small" machines
  3045. */
  3046.  
  3047. /*
  3048. * Unroll 8 rounds on big systems, 2 rounds on small systems.
  3049. */
  3050. #ifndef SPH_KECCAK_UNROLL
  3051. #if SPH_SMALL_FOOTPRINT_KECCAK
  3052. #define SPH_KECCAK_UNROLL 2
  3053. #else
  3054. #define SPH_KECCAK_UNROLL 8
  3055. #endif
  3056. #endif
  3057.  
  3058. __constant const sph_u64 RC[] = {
  3059. SPH_C64(0x0000000000000001), SPH_C64(0x0000000000008082),
  3060. SPH_C64(0x800000000000808A), SPH_C64(0x8000000080008000),
  3061. SPH_C64(0x000000000000808B), SPH_C64(0x0000000080000001),
  3062. SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008009),
  3063. SPH_C64(0x000000000000008A), SPH_C64(0x0000000000000088),
  3064. SPH_C64(0x0000000080008009), SPH_C64(0x000000008000000A),
  3065. SPH_C64(0x000000008000808B), SPH_C64(0x800000000000008B),
  3066. SPH_C64(0x8000000000008089), SPH_C64(0x8000000000008003),
  3067. SPH_C64(0x8000000000008002), SPH_C64(0x8000000000000080),
  3068. SPH_C64(0x000000000000800A), SPH_C64(0x800000008000000A),
  3069. SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008080),
  3070. SPH_C64(0x0000000080000001), SPH_C64(0x8000000080008008)
  3071. };
  3072.  
  3073. #define DECL64(x) sph_u64 x
  3074. #define MOV64(d, s) (d = s)
  3075. #define XOR64(d, a, b) (d = a ^ b)
  3076. #define AND64(d, a, b) (d = a & b)
  3077. #define OR64(d, a, b) (d = a | b)
  3078. #define NOT64(d, s) (d = SPH_T64(~s))
  3079. #define ROL64(d, v, n) (d = SPH_ROTL64(v, n))
  3080. #define XOR64_IOTA XOR64
  3081.  
  3082. #define TH_ELT(t, c0, c1, c2, c3, c4, d0, d1, d2, d3, d4) do { \
  3083. DECL64(tt0); \
  3084. DECL64(tt1); \
  3085. DECL64(tt2); \
  3086. DECL64(tt3); \
  3087. XOR64(tt0, d0, d1); \
  3088. XOR64(tt1, d2, d3); \
  3089. XOR64(tt0, tt0, d4); \
  3090. XOR64(tt0, tt0, tt1); \
  3091. ROL64(tt0, tt0, 1); \
  3092. XOR64(tt2, c0, c1); \
  3093. XOR64(tt3, c2, c3); \
  3094. XOR64(tt0, tt0, c4); \
  3095. XOR64(tt2, tt2, tt3); \
  3096. XOR64(t, tt0, tt2); \
  3097. } while (0)
  3098.  
  3099. #define THETA(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \
  3100. b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \
  3101. b40, b41, b42, b43, b44) \
  3102. do { \
  3103. DECL64(t0); \
  3104. DECL64(t1); \
  3105. DECL64(t2); \
  3106. DECL64(t3); \
  3107. DECL64(t4); \
  3108. TH_ELT(t0, b40, b41, b42, b43, b44, b10, b11, b12, b13, b14); \
  3109. TH_ELT(t1, b00, b01, b02, b03, b04, b20, b21, b22, b23, b24); \
  3110. TH_ELT(t2, b10, b11, b12, b13, b14, b30, b31, b32, b33, b34); \
  3111. TH_ELT(t3, b20, b21, b22, b23, b24, b40, b41, b42, b43, b44); \
  3112. TH_ELT(t4, b30, b31, b32, b33, b34, b00, b01, b02, b03, b04); \
  3113. XOR64(b00, b00, t0); \
  3114. XOR64(b01, b01, t0); \
  3115. XOR64(b02, b02, t0); \
  3116. XOR64(b03, b03, t0); \
  3117. XOR64(b04, b04, t0); \
  3118. XOR64(b10, b10, t1); \
  3119. XOR64(b11, b11, t1); \
  3120. XOR64(b12, b12, t1); \
  3121. XOR64(b13, b13, t1); \
  3122. XOR64(b14, b14, t1); \
  3123. XOR64(b20, b20, t2); \
  3124. XOR64(b21, b21, t2); \
  3125. XOR64(b22, b22, t2); \
  3126. XOR64(b23, b23, t2); \
  3127. XOR64(b24, b24, t2); \
  3128. XOR64(b30, b30, t3); \
  3129. XOR64(b31, b31, t3); \
  3130. XOR64(b32, b32, t3); \
  3131. XOR64(b33, b33, t3); \
  3132. XOR64(b34, b34, t3); \
  3133. XOR64(b40, b40, t4); \
  3134. XOR64(b41, b41, t4); \
  3135. XOR64(b42, b42, t4); \
  3136. XOR64(b43, b43, t4); \
  3137. XOR64(b44, b44, t4); \
  3138. } while (0)
  3139.  
  3140. #define RHO(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \
  3141. b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \
  3142. b40, b41, b42, b43, b44) \
  3143. do { \
  3144. /* ROL64(b00, b00, 0); */ \
  3145. ROL64(b01, b01, 36); \
  3146. ROL64(b02, b02, 3); \
  3147. ROL64(b03, b03, 41); \
  3148. ROL64(b04, b04, 18); \
  3149. ROL64(b10, b10, 1); \
  3150. ROL64(b11, b11, 44); \
  3151. ROL64(b12, b12, 10); \
  3152. ROL64(b13, b13, 45); \
  3153. ROL64(b14, b14, 2); \
  3154. ROL64(b20, b20, 62); \
  3155. ROL64(b21, b21, 6); \
  3156. ROL64(b22, b22, 43); \
  3157. ROL64(b23, b23, 15); \
  3158. ROL64(b24, b24, 61); \
  3159. ROL64(b30, b30, 28); \
  3160. ROL64(b31, b31, 55); \
  3161. ROL64(b32, b32, 25); \
  3162. ROL64(b33, b33, 21); \
  3163. ROL64(b34, b34, 56); \
  3164. ROL64(b40, b40, 27); \
  3165. ROL64(b41, b41, 20); \
  3166. ROL64(b42, b42, 39); \
  3167. ROL64(b43, b43, 8); \
  3168. ROL64(b44, b44, 14); \
  3169. } while (0)
  3170.  
  3171. /*
  3172. * The KHI macro integrates the "lane complement" optimization. On input,
  3173. * some words are complemented:
  3174. * a00 a01 a02 a04 a13 a20 a21 a22 a30 a33 a34 a43
  3175. * On output, the following words are complemented:
  3176. * a04 a10 a20 a22 a23 a31
  3177. *
  3178. * The (implicit) permutation and the theta expansion will bring back
  3179. * the input mask for the next round.
  3180. */
  3181.  
  3182. #define KHI_XO(d, a, b, c) do { \
  3183. DECL64(kt); \
  3184. OR64(kt, b, c); \
  3185. XOR64(d, a, kt); \
  3186. } while (0)
  3187.  
  3188. #define KHI_XA(d, a, b, c) do { \
  3189. DECL64(kt); \
  3190. AND64(kt, b, c); \
  3191. XOR64(d, a, kt); \
  3192. } while (0)
  3193.  
  3194. #define KHI(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \
  3195. b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \
  3196. b40, b41, b42, b43, b44) \
  3197. do { \
  3198. DECL64(c0); \
  3199. DECL64(c1); \
  3200. DECL64(c2); \
  3201. DECL64(c3); \
  3202. DECL64(c4); \
  3203. DECL64(bnn); \
  3204. NOT64(bnn, b20); \
  3205. KHI_XO(c0, b00, b10, b20); \
  3206. KHI_XO(c1, b10, bnn, b30); \
  3207. KHI_XA(c2, b20, b30, b40); \
  3208. KHI_XO(c3, b30, b40, b00); \
  3209. KHI_XA(c4, b40, b00, b10); \
  3210. MOV64(b00, c0); \
  3211. MOV64(b10, c1); \
  3212. MOV64(b20, c2); \
  3213. MOV64(b30, c3); \
  3214. MOV64(b40, c4); \
  3215. NOT64(bnn, b41); \
  3216. KHI_XO(c0, b01, b11, b21); \
  3217. KHI_XA(c1, b11, b21, b31); \
  3218. KHI_XO(c2, b21, b31, bnn); \
  3219. KHI_XO(c3, b31, b41, b01); \
  3220. KHI_XA(c4, b41, b01, b11); \
  3221. MOV64(b01, c0); \
  3222. MOV64(b11, c1); \
  3223. MOV64(b21, c2); \
  3224. MOV64(b31, c3); \
  3225. MOV64(b41, c4); \
  3226. NOT64(bnn, b32); \
  3227. KHI_XO(c0, b02, b12, b22); \
  3228. KHI_XA(c1, b12, b22, b32); \
  3229. KHI_XA(c2, b22, bnn, b42); \
  3230. KHI_XO(c3, bnn, b42, b02); \
  3231. KHI_XA(c4, b42, b02, b12); \
  3232. MOV64(b02, c0); \
  3233. MOV64(b12, c1); \
  3234. MOV64(b22, c2); \
  3235. MOV64(b32, c3); \
  3236. MOV64(b42, c4); \
  3237. NOT64(bnn, b33); \
  3238. KHI_XA(c0, b03, b13, b23); \
  3239. KHI_XO(c1, b13, b23, b33); \
  3240. KHI_XO(c2, b23, bnn, b43); \
  3241. KHI_XA(c3, bnn, b43, b03); \
  3242. KHI_XO(c4, b43, b03, b13); \
  3243. MOV64(b03, c0); \
  3244. MOV64(b13, c1); \
  3245. MOV64(b23, c2); \
  3246. MOV64(b33, c3); \
  3247. MOV64(b43, c4); \
  3248. NOT64(bnn, b14); \
  3249. KHI_XA(c0, b04, bnn, b24); \
  3250. KHI_XO(c1, bnn, b24, b34); \
  3251. KHI_XA(c2, b24, b34, b44); \
  3252. KHI_XO(c3, b34, b44, b04); \
  3253. KHI_XA(c4, b44, b04, b14); \
  3254. MOV64(b04, c0); \
  3255. MOV64(b14, c1); \
  3256. MOV64(b24, c2); \
  3257. MOV64(b34, c3); \
  3258. MOV64(b44, c4); \
  3259. } while (0)
  3260.  
  3261. #define IOTA(r) XOR64_IOTA(a00, a00, r)
  3262.  
  3263. #define P0 a00, a01, a02, a03, a04, a10, a11, a12, a13, a14, a20, a21, \
  3264. a22, a23, a24, a30, a31, a32, a33, a34, a40, a41, a42, a43, a44
  3265. #define P1 a00, a30, a10, a40, a20, a11, a41, a21, a01, a31, a22, a02, \
  3266. a32, a12, a42, a33, a13, a43, a23, a03, a44, a24, a04, a34, a14
  3267. #define P2 a00, a33, a11, a44, a22, a41, a24, a02, a30, a13, a32, a10, \
  3268. a43, a21, a04, a23, a01, a34, a12, a40, a14, a42, a20, a03, a31
  3269. #define P3 a00, a23, a41, a14, a32, a24, a42, a10, a33, a01, a43, a11, \
  3270. a34, a02, a20, a12, a30, a03, a21, a44, a31, a04, a22, a40, a13
  3271. #define P4 a00, a12, a24, a31, a43, a42, a04, a11, a23, a30, a34, a41, \
  3272. a03, a10, a22, a21, a33, a40, a02, a14, a13, a20, a32, a44, a01
  3273. #define P5 a00, a21, a42, a13, a34, a04, a20, a41, a12, a33, a03, a24, \
  3274. a40, a11, a32, a02, a23, a44, a10, a31, a01, a22, a43, a14, a30
  3275. #define P6 a00, a02, a04, a01, a03, a20, a22, a24, a21, a23, a40, a42, \
  3276. a44, a41, a43, a10, a12, a14, a11, a13, a30, a32, a34, a31, a33
  3277. #define P7 a00, a10, a20, a30, a40, a22, a32, a42, a02, a12, a44, a04, \
  3278. a14, a24, a34, a11, a21, a31, a41, a01, a33, a43, a03, a13, a23
  3279. #define P8 a00, a11, a22, a33, a44, a32, a43, a04, a10, a21, a14, a20, \
  3280. a31, a42, a03, a41, a02, a13, a24, a30, a23, a34, a40, a01, a12
  3281. #define P9 a00, a41, a32, a23, a14, a43, a34, a20, a11, a02, a31, a22, \
  3282. a13, a04, a40, a24, a10, a01, a42, a33, a12, a03, a44, a30, a21
  3283. #define P10 a00, a24, a43, a12, a31, a34, a03, a22, a41, a10, a13, a32, \
  3284. a01, a20, a44, a42, a11, a30, a04, a23, a21, a40, a14, a33, a02
  3285. #define P11 a00, a42, a34, a21, a13, a03, a40, a32, a24, a11, a01, a43, \
  3286. a30, a22, a14, a04, a41, a33, a20, a12, a02, a44, a31, a23, a10
  3287. #define P12 a00, a04, a03, a02, a01, a40, a44, a43, a42, a41, a30, a34, \
  3288. a33, a32, a31, a20, a24, a23, a22, a21, a10, a14, a13, a12, a11
  3289. #define P13 a00, a20, a40, a10, a30, a44, a14, a34, a04, a24, a33, a03, \
  3290. a23, a43, a13, a22, a42, a12, a32, a02, a11, a31, a01, a21, a41
  3291. #define P14 a00, a22, a44, a11, a33, a14, a31, a03, a20, a42, a23, a40, \
  3292. a12, a34, a01, a32, a04, a21, a43, a10, a41, a13, a30, a02, a24
  3293. #define P15 a00, a32, a14, a41, a23, a31, a13, a40, a22, a04, a12, a44, \
  3294. a21, a03, a30, a43, a20, a02, a34, a11, a24, a01, a33, a10, a42
  3295. #define P16 a00, a43, a31, a24, a12, a13, a01, a44, a32, a20, a21, a14, \
  3296. a02, a40, a33, a34, a22, a10, a03, a41, a42, a30, a23, a11, a04
  3297. #define P17 a00, a34, a13, a42, a21, a01, a30, a14, a43, a22, a02, a31, \
  3298. a10, a44, a23, a03, a32, a11, a40, a24, a04, a33, a12, a41, a20
  3299. #define P18 a00, a03, a01, a04, a02, a30, a33, a31, a34, a32, a10, a13, \
  3300. a11, a14, a12, a40, a43, a41, a44, a42, a20, a23, a21, a24, a22
  3301. #define P19 a00, a40, a30, a20, a10, a33, a23, a13, a03, a43, a11, a01, \
  3302. a41, a31, a21, a44, a34, a24, a14, a04, a22, a12, a02, a42, a32
  3303. #define P20 a00, a44, a33, a22, a11, a23, a12, a01, a40, a34, a41, a30, \
  3304. a24, a13, a02, a14, a03, a42, a31, a20, a32, a21, a10, a04, a43
  3305. #define P21 a00, a14, a23, a32, a41, a12, a21, a30, a44, a03, a24, a33, \
  3306. a42, a01, a10, a31, a40, a04, a13, a22, a43, a02, a11, a20, a34
  3307. #define P22 a00, a31, a12, a43, a24, a21, a02, a33, a14, a40, a42, a23, \
  3308. a04, a30, a11, a13, a44, a20, a01, a32, a34, a10, a41, a22, a03
  3309. #define P23 a00, a13, a21, a34, a42, a02, a10, a23, a31, a44, a04, a12, \
  3310. a20, a33, a41, a01, a14, a22, a30, a43, a03, a11, a24, a32, a40
  3311.  
  3312. #define P1_TO_P0 do { \
  3313. DECL64(t); \
  3314. MOV64(t, a01); \
  3315. MOV64(a01, a30); \
  3316. MOV64(a30, a33); \
  3317. MOV64(a33, a23); \
  3318. MOV64(a23, a12); \
  3319. MOV64(a12, a21); \
  3320. MOV64(a21, a02); \
  3321. MOV64(a02, a10); \
  3322. MOV64(a10, a11); \
  3323. MOV64(a11, a41); \
  3324. MOV64(a41, a24); \
  3325. MOV64(a24, a42); \
  3326. MOV64(a42, a04); \
  3327. MOV64(a04, a20); \
  3328. MOV64(a20, a22); \
  3329. MOV64(a22, a32); \
  3330. MOV64(a32, a43); \
  3331. MOV64(a43, a34); \
  3332. MOV64(a34, a03); \
  3333. MOV64(a03, a40); \
  3334. MOV64(a40, a44); \
  3335. MOV64(a44, a14); \
  3336. MOV64(a14, a31); \
  3337. MOV64(a31, a13); \
  3338. MOV64(a13, t); \
  3339. } while (0)
  3340.  
  3341. #define P2_TO_P0 do { \
  3342. DECL64(t); \
  3343. MOV64(t, a01); \
  3344. MOV64(a01, a33); \
  3345. MOV64(a33, a12); \
  3346. MOV64(a12, a02); \
  3347. MOV64(a02, a11); \
  3348. MOV64(a11, a24); \
  3349. MOV64(a24, a04); \
  3350. MOV64(a04, a22); \
  3351. MOV64(a22, a43); \
  3352. MOV64(a43, a03); \
  3353. MOV64(a03, a44); \
  3354. MOV64(a44, a31); \
  3355. MOV64(a31, t); \
  3356. MOV64(t, a10); \
  3357. MOV64(a10, a41); \
  3358. MOV64(a41, a42); \
  3359. MOV64(a42, a20); \
  3360. MOV64(a20, a32); \
  3361. MOV64(a32, a34); \
  3362. MOV64(a34, a40); \
  3363. MOV64(a40, a14); \
  3364. MOV64(a14, a13); \
  3365. MOV64(a13, a30); \
  3366. MOV64(a30, a23); \
  3367. MOV64(a23, a21); \
  3368. MOV64(a21, t); \
  3369. } while (0)
  3370.  
  3371. #define P4_TO_P0 do { \
  3372. DECL64(t); \
  3373. MOV64(t, a01); \
  3374. MOV64(a01, a12); \
  3375. MOV64(a12, a11); \
  3376. MOV64(a11, a04); \
  3377. MOV64(a04, a43); \
  3378. MOV64(a43, a44); \
  3379. MOV64(a44, t); \
  3380. MOV64(t, a02); \
  3381. MOV64(a02, a24); \
  3382. MOV64(a24, a22); \
  3383. MOV64(a22, a03); \
  3384. MOV64(a03, a31); \
  3385. MOV64(a31, a33); \
  3386. MOV64(a33, t); \
  3387. MOV64(t, a10); \
  3388. MOV64(a10, a42); \
  3389. MOV64(a42, a32); \
  3390. MOV64(a32, a40); \
  3391. MOV64(a40, a13); \
  3392. MOV64(a13, a23); \
  3393. MOV64(a23, t); \
  3394. MOV64(t, a14); \
  3395. MOV64(a14, a30); \
  3396. MOV64(a30, a21); \
  3397. MOV64(a21, a41); \
  3398. MOV64(a41, a20); \
  3399. MOV64(a20, a34); \
  3400. MOV64(a34, t); \
  3401. } while (0)
  3402.  
  3403. #define P6_TO_P0 do { \
  3404. DECL64(t); \
  3405. MOV64(t, a01); \
  3406. MOV64(a01, a02); \
  3407. MOV64(a02, a04); \
  3408. MOV64(a04, a03); \
  3409. MOV64(a03, t); \
  3410. MOV64(t, a10); \
  3411. MOV64(a10, a20); \
  3412. MOV64(a20, a40); \
  3413. MOV64(a40, a30); \
  3414. MOV64(a30, t); \
  3415. MOV64(t, a11); \
  3416. MOV64(a11, a22); \
  3417. MOV64(a22, a44); \
  3418. MOV64(a44, a33); \
  3419. MOV64(a33, t); \
  3420. MOV64(t, a12); \
  3421. MOV64(a12, a24); \
  3422. MOV64(a24, a43); \
  3423. MOV64(a43, a31); \
  3424. MOV64(a31, t); \
  3425. MOV64(t, a13); \
  3426. MOV64(a13, a21); \
  3427. MOV64(a21, a42); \
  3428. MOV64(a42, a34); \
  3429. MOV64(a34, t); \
  3430. MOV64(t, a14); \
  3431. MOV64(a14, a23); \
  3432. MOV64(a23, a41); \
  3433. MOV64(a41, a32); \
  3434. MOV64(a32, t); \
  3435. } while (0)
  3436.  
  3437. #define P8_TO_P0 do { \
  3438. DECL64(t); \
  3439. MOV64(t, a01); \
  3440. MOV64(a01, a11); \
  3441. MOV64(a11, a43); \
  3442. MOV64(a43, t); \
  3443. MOV64(t, a02); \
  3444. MOV64(a02, a22); \
  3445. MOV64(a22, a31); \
  3446. MOV64(a31, t); \
  3447. MOV64(t, a03); \
  3448. MOV64(a03, a33); \
  3449. MOV64(a33, a24); \
  3450. MOV64(a24, t); \
  3451. MOV64(t, a04); \
  3452. MOV64(a04, a44); \
  3453. MOV64(a44, a12); \
  3454. MOV64(a12, t); \
  3455. MOV64(t, a10); \
  3456. MOV64(a10, a32); \
  3457. MOV64(a32, a13); \
  3458. MOV64(a13, t); \
  3459. MOV64(t, a14); \
  3460. MOV64(a14, a21); \
  3461. MOV64(a21, a20); \
  3462. MOV64(a20, t); \
  3463. MOV64(t, a23); \
  3464. MOV64(a23, a42); \
  3465. MOV64(a42, a40); \
  3466. MOV64(a40, t); \
  3467. MOV64(t, a30); \
  3468. MOV64(a30, a41); \
  3469. MOV64(a41, a34); \
  3470. MOV64(a34, t); \
  3471. } while (0)
  3472.  
  3473. #define P12_TO_P0 do { \
  3474. DECL64(t); \
  3475. MOV64(t, a01); \
  3476. MOV64(a01, a04); \
  3477. MOV64(a04, t); \
  3478. MOV64(t, a02); \
  3479. MOV64(a02, a03); \
  3480. MOV64(a03, t); \
  3481. MOV64(t, a10); \
  3482. MOV64(a10, a40); \
  3483. MOV64(a40, t); \
  3484. MOV64(t, a11); \
  3485. MOV64(a11, a44); \
  3486. MOV64(a44, t); \
  3487. MOV64(t, a12); \
  3488. MOV64(a12, a43); \
  3489. MOV64(a43, t); \
  3490. MOV64(t, a13); \
  3491. MOV64(a13, a42); \
  3492. MOV64(a42, t); \
  3493. MOV64(t, a14); \
  3494. MOV64(a14, a41); \
  3495. MOV64(a41, t); \
  3496. MOV64(t, a20); \
  3497. MOV64(a20, a30); \
  3498. MOV64(a30, t); \
  3499. MOV64(t, a21); \
  3500. MOV64(a21, a34); \
  3501. MOV64(a34, t); \
  3502. MOV64(t, a22); \
  3503. MOV64(a22, a33); \
  3504. MOV64(a33, t); \
  3505. MOV64(t, a23); \
  3506. MOV64(a23, a32); \
  3507. MOV64(a32, t); \
  3508. MOV64(t, a24); \
  3509. MOV64(a24, a31); \
  3510. MOV64(a31, t); \
  3511. } while (0)
  3512.  
  3513. #define LPAR (
  3514. #define RPAR )
  3515.  
  3516. #define KF_ELT(r, s, k) do { \
  3517. THETA LPAR P ## r RPAR; \
  3518. RHO LPAR P ## r RPAR; \
  3519. KHI LPAR P ## s RPAR; \
  3520. IOTA(k); \
  3521. } while (0)
  3522.  
  3523. #define DO(x) x
  3524.  
  3525. #define KECCAK_F_1600 DO(KECCAK_F_1600_)
  3526.  
  3527. #if SPH_KECCAK_UNROLL == 1
  3528.  
  3529. #define KECCAK_F_1600_ do { \
  3530. int j; \
  3531. for (j = 0; j < 24; j ++) { \
  3532. KF_ELT( 0, 1, RC[j + 0]); \
  3533. P1_TO_P0; \
  3534. } \
  3535. } while (0)
  3536.  
  3537. #elif SPH_KECCAK_UNROLL == 2
  3538.  
  3539. #define KECCAK_F_1600_ do { \
  3540. int j; \
  3541. for (j = 0; j < 24; j += 2) { \
  3542. KF_ELT( 0, 1, RC[j + 0]); \
  3543. KF_ELT( 1, 2, RC[j + 1]); \
  3544. P2_TO_P0; \
  3545. } \
  3546. } while (0)
  3547.  
  3548. #elif SPH_KECCAK_UNROLL == 4
  3549.  
  3550. #define KECCAK_F_1600_ do { \
  3551. int j; \
  3552. for (j = 0; j < 24; j += 4) { \
  3553. KF_ELT( 0, 1, RC[j + 0]); \
  3554. KF_ELT( 1, 2, RC[j + 1]); \
  3555. KF_ELT( 2, 3, RC[j + 2]); \
  3556. KF_ELT( 3, 4, RC[j + 3]); \
  3557. P4_TO_P0; \
  3558. } \
  3559. } while (0)
  3560.  
  3561. #elif SPH_KECCAK_UNROLL == 6
  3562.  
  3563. #define KECCAK_F_1600_ do { \
  3564. int j; \
  3565. for (j = 0; j < 24; j += 6) { \
  3566. KF_ELT( 0, 1, RC[j + 0]); \
  3567. KF_ELT( 1, 2, RC[j + 1]); \
  3568. KF_ELT( 2, 3, RC[j + 2]); \
  3569. KF_ELT( 3, 4, RC[j + 3]); \
  3570. KF_ELT( 4, 5, RC[j + 4]); \
  3571. KF_ELT( 5, 6, RC[j + 5]); \
  3572. P6_TO_P0; \
  3573. } \
  3574. } while (0)
  3575.  
  3576. #elif SPH_KECCAK_UNROLL == 8
  3577.  
  3578. #define KECCAK_F_1600_ do { \
  3579. int j; \
  3580. for (j = 0; j < 24; j += 8) { \
  3581. KF_ELT( 0, 1, RC[j + 0]); \
  3582. KF_ELT( 1, 2, RC[j + 1]); \
  3583. KF_ELT( 2, 3, RC[j + 2]); \
  3584. KF_ELT( 3, 4, RC[j + 3]); \
  3585. KF_ELT( 4, 5, RC[j + 4]); \
  3586. KF_ELT( 5, 6, RC[j + 5]); \
  3587. KF_ELT( 6, 7, RC[j + 6]); \
  3588. KF_ELT( 7, 8, RC[j + 7]); \
  3589. P8_TO_P0; \
  3590. } \
  3591. } while (0)
  3592.  
  3593. #elif SPH_KECCAK_UNROLL == 12
  3594.  
  3595. #define KECCAK_F_1600_ do { \
  3596. int j; \
  3597. for (j = 0; j < 24; j += 12) { \
  3598. KF_ELT( 0, 1, RC[j + 0]); \
  3599. KF_ELT( 1, 2, RC[j + 1]); \
  3600. KF_ELT( 2, 3, RC[j + 2]); \
  3601. KF_ELT( 3, 4, RC[j + 3]); \
  3602. KF_ELT( 4, 5, RC[j + 4]); \
  3603. KF_ELT( 5, 6, RC[j + 5]); \
  3604. KF_ELT( 6, 7, RC[j + 6]); \
  3605. KF_ELT( 7, 8, RC[j + 7]); \
  3606. KF_ELT( 8, 9, RC[j + 8]); \
  3607. KF_ELT( 9, 10, RC[j + 9]); \
  3608. KF_ELT(10, 11, RC[j + 10]); \
  3609. KF_ELT(11, 12, RC[j + 11]); \
  3610. P12_TO_P0; \
  3611. } \
  3612. } while (0)
  3613.  
  3614. #elif SPH_KECCAK_UNROLL == 0
  3615.  
  3616. #define KECCAK_F_1600_ do { \
  3617. KF_ELT( 0, 1, RC[ 0]); \
  3618. KF_ELT( 1, 2, RC[ 1]); \
  3619. KF_ELT( 2, 3, RC[ 2]); \
  3620. KF_ELT( 3, 4, RC[ 3]); \
  3621. KF_ELT( 4, 5, RC[ 4]); \
  3622. KF_ELT( 5, 6, RC[ 5]); \
  3623. KF_ELT( 6, 7, RC[ 6]); \
  3624. KF_ELT( 7, 8, RC[ 7]); \
  3625. KF_ELT( 8, 9, RC[ 8]); \
  3626. KF_ELT( 9, 10, RC[ 9]); \
  3627. KF_ELT(10, 11, RC[10]); \
  3628. KF_ELT(11, 12, RC[11]); \
  3629. KF_ELT(12, 13, RC[12]); \
  3630. KF_ELT(13, 14, RC[13]); \
  3631. KF_ELT(14, 15, RC[14]); \
  3632. KF_ELT(15, 16, RC[15]); \
  3633. KF_ELT(16, 17, RC[16]); \
  3634. KF_ELT(17, 18, RC[17]); \
  3635. KF_ELT(18, 19, RC[18]); \
  3636. KF_ELT(19, 20, RC[19]); \
  3637. KF_ELT(20, 21, RC[20]); \
  3638. KF_ELT(21, 22, RC[21]); \
  3639. KF_ELT(22, 23, RC[22]); \
  3640. KF_ELT(23, 0, RC[23]); \
  3641. } while (0)
  3642.  
  3643. #else
  3644.  
  3645. #error Unimplemented unroll count for Keccak.
  3646.  
  3647. #endif
  3648.  
  3649. /* $Id: skein.c 254 2011-06-07 19:38:58Z tp $ */
  3650. /*
  3651. * Skein implementation.
  3652. *
  3653. * ==========================(LICENSE BEGIN)============================
  3654. *
  3655. * Copyright (c) 2007-2010 Projet RNRT SAPHIR
  3656. *
  3657. * Permission is hereby granted, free of charge, to any person obtaining
  3658. * a copy of this software and associated documentation files (the
  3659. * "Software"), to deal in the Software without restriction, including
  3660. * without limitation the rights to use, copy, modify, merge, publish,
  3661. * distribute, sublicense, and/or sell copies of the Software, and to
  3662. * permit persons to whom the Software is furnished to do so, subject to
  3663. * the following conditions:
  3664. *
  3665. * The above copyright notice and this permission notice shall be
  3666. * included in all copies or substantial portions of the Software.
  3667. *
  3668. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  3669. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  3670. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  3671. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  3672. * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  3673. * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  3674. * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  3675. *
  3676. * ===========================(LICENSE END)=============================
  3677. *
  3678. * @author Thomas Pornin <thomas.pornin@cryptolog.com>
  3679. */
  3680.  
  3681. /*
  3682. * M9_ ## s ## _ ## i evaluates to s+i mod 9 (0 <= s <= 18, 0 <= i <= 7).
  3683. */
  3684.  
  3685. #define M9_0_0 0
  3686. #define M9_0_1 1
  3687. #define M9_0_2 2
  3688. #define M9_0_3 3
  3689. #define M9_0_4 4
  3690. #define M9_0_5 5
  3691. #define M9_0_6 6
  3692. #define M9_0_7 7
  3693.  
  3694. #define M9_1_0 1
  3695. #define M9_1_1 2
  3696. #define M9_1_2 3
  3697. #define M9_1_3 4
  3698. #define M9_1_4 5
  3699. #define M9_1_5 6
  3700. #define M9_1_6 7
  3701. #define M9_1_7 8
  3702.  
  3703. #define M9_2_0 2
  3704. #define M9_2_1 3
  3705. #define M9_2_2 4
  3706. #define M9_2_3 5
  3707. #define M9_2_4 6
  3708. #define M9_2_5 7
  3709. #define M9_2_6 8
  3710. #define M9_2_7 0
  3711.  
  3712. #define M9_3_0 3
  3713. #define M9_3_1 4
  3714. #define M9_3_2 5
  3715. #define M9_3_3 6
  3716. #define M9_3_4 7
  3717. #define M9_3_5 8
  3718. #define M9_3_6 0
  3719. #define M9_3_7 1
  3720.  
  3721. #define M9_4_0 4
  3722. #define M9_4_1 5
  3723. #define M9_4_2 6
  3724. #define M9_4_3 7
  3725. #define M9_4_4 8
  3726. #define M9_4_5 0
  3727. #define M9_4_6 1
  3728. #define M9_4_7 2
  3729.  
  3730. #define M9_5_0 5
  3731. #define M9_5_1 6
  3732. #define M9_5_2 7
  3733. #define M9_5_3 8
  3734. #define M9_5_4 0
  3735. #define M9_5_5 1
  3736. #define M9_5_6 2
  3737. #define M9_5_7 3
  3738.  
  3739. #define M9_6_0 6
  3740. #define M9_6_1 7
  3741. #define M9_6_2 8
  3742. #define M9_6_3 0
  3743. #define M9_6_4 1
  3744. #define M9_6_5 2
  3745. #define M9_6_6 3
  3746. #define M9_6_7 4
  3747.  
  3748. #define M9_7_0 7
  3749. #define M9_7_1 8
  3750. #define M9_7_2 0
  3751. #define M9_7_3 1
  3752. #define M9_7_4 2
  3753. #define M9_7_5 3
  3754. #define M9_7_6 4
  3755. #define M9_7_7 5
  3756.  
  3757. #define M9_8_0 8
  3758. #define M9_8_1 0
  3759. #define M9_8_2 1
  3760. #define M9_8_3 2
  3761. #define M9_8_4 3
  3762. #define M9_8_5 4
  3763. #define M9_8_6 5
  3764. #define M9_8_7 6
  3765.  
  3766. #define M9_9_0 0
  3767. #define M9_9_1 1
  3768. #define M9_9_2 2
  3769. #define M9_9_3 3
  3770. #define M9_9_4 4
  3771. #define M9_9_5 5
  3772. #define M9_9_6 6
  3773. #define M9_9_7 7
  3774.  
  3775. #define M9_10_0 1
  3776. #define M9_10_1 2
  3777. #define M9_10_2 3
  3778. #define M9_10_3 4
  3779. #define M9_10_4 5
  3780. #define M9_10_5 6
  3781. #define M9_10_6 7
  3782. #define M9_10_7 8
  3783.  
  3784. #define M9_11_0 2
  3785. #define M9_11_1 3
  3786. #define M9_11_2 4
  3787. #define M9_11_3 5
  3788. #define M9_11_4 6
  3789. #define M9_11_5 7
  3790. #define M9_11_6 8
  3791. #define M9_11_7 0
  3792.  
  3793. #define M9_12_0 3
  3794. #define M9_12_1 4
  3795. #define M9_12_2 5
  3796. #define M9_12_3 6
  3797. #define M9_12_4 7
  3798. #define M9_12_5 8
  3799. #define M9_12_6 0
  3800. #define M9_12_7 1
  3801.  
  3802. #define M9_13_0 4
  3803. #define M9_13_1 5
  3804. #define M9_13_2 6
  3805. #define M9_13_3 7
  3806. #define M9_13_4 8
  3807. #define M9_13_5 0
  3808. #define M9_13_6 1
  3809. #define M9_13_7 2
  3810.  
  3811. #define M9_14_0 5
  3812. #define M9_14_1 6
  3813. #define M9_14_2 7
  3814. #define M9_14_3 8
  3815. #define M9_14_4 0
  3816. #define M9_14_5 1
  3817. #define M9_14_6 2
  3818. #define M9_14_7 3
  3819.  
  3820. #define M9_15_0 6
  3821. #define M9_15_1 7
  3822. #define M9_15_2 8
  3823. #define M9_15_3 0
  3824. #define M9_15_4 1
  3825. #define M9_15_5 2
  3826. #define M9_15_6 3
  3827. #define M9_15_7 4
  3828.  
  3829. #define M9_16_0 7
  3830. #define M9_16_1 8
  3831. #define M9_16_2 0
  3832. #define M9_16_3 1
  3833. #define M9_16_4 2
  3834. #define M9_16_5 3
  3835. #define M9_16_6 4
  3836. #define M9_16_7 5
  3837.  
  3838. #define M9_17_0 8
  3839. #define M9_17_1 0
  3840. #define M9_17_2 1
  3841. #define M9_17_3 2
  3842. #define M9_17_4 3
  3843. #define M9_17_5 4
  3844. #define M9_17_6 5
  3845. #define M9_17_7 6
  3846.  
  3847. #define M9_18_0 0
  3848. #define M9_18_1 1
  3849. #define M9_18_2 2
  3850. #define M9_18_3 3
  3851. #define M9_18_4 4
  3852. #define M9_18_5 5
  3853. #define M9_18_6 6
  3854. #define M9_18_7 7
  3855.  
  3856. /*
  3857. * M3_ ## s ## _ ## i evaluates to s+i mod 3 (0 <= s <= 18, 0 <= i <= 1).
  3858. */
  3859.  
  3860. #define M3_0_0 0
  3861. #define M3_0_1 1
  3862. #define M3_1_0 1
  3863. #define M3_1_1 2
  3864. #define M3_2_0 2
  3865. #define M3_2_1 0
  3866. #define M3_3_0 0
  3867. #define M3_3_1 1
  3868. #define M3_4_0 1
  3869. #define M3_4_1 2
  3870. #define M3_5_0 2
  3871. #define M3_5_1 0
  3872. #define M3_6_0 0
  3873. #define M3_6_1 1
  3874. #define M3_7_0 1
  3875. #define M3_7_1 2
  3876. #define M3_8_0 2
  3877. #define M3_8_1 0
  3878. #define M3_9_0 0
  3879. #define M3_9_1 1
  3880. #define M3_10_0 1
  3881. #define M3_10_1 2
  3882. #define M3_11_0 2
  3883. #define M3_11_1 0
  3884. #define M3_12_0 0
  3885. #define M3_12_1 1
  3886. #define M3_13_0 1
  3887. #define M3_13_1 2
  3888. #define M3_14_0 2
  3889. #define M3_14_1 0
  3890. #define M3_15_0 0
  3891. #define M3_15_1 1
  3892. #define M3_16_0 1
  3893. #define M3_16_1 2
  3894. #define M3_17_0 2
  3895. #define M3_17_1 0
  3896. #define M3_18_0 0
  3897. #define M3_18_1 1
  3898.  
  3899. #define XCAT(x, y) XCAT_(x, y)
  3900. #define XCAT_(x, y) x ## y
  3901.  
  3902. #define SKBI(k, s, i) XCAT(k, XCAT(XCAT(XCAT(M9_, s), _), i))
  3903. #define SKBT(t, s, v) XCAT(t, XCAT(XCAT(XCAT(M3_, s), _), v))
  3904.  
  3905. #define TFBIG_KINIT(k0, k1, k2, k3, k4, k5, k6, k7, k8, t0, t1, t2) do { \
  3906. k8 = ((k0 ^ k1) ^ (k2 ^ k3)) ^ ((k4 ^ k5) ^ (k6 ^ k7)) \
  3907. ^ SPH_C64(0x1BD11BDAA9FC1A22); \
  3908. t2 = t0 ^ t1; \
  3909. } while (0)
  3910.  
  3911. #define TFBIG_ADDKEY(w0, w1, w2, w3, w4, w5, w6, w7, k, t, s) do { \
  3912. w0 = SPH_T64(w0 + SKBI(k, s, 0)); \
  3913. w1 = SPH_T64(w1 + SKBI(k, s, 1)); \
  3914. w2 = SPH_T64(w2 + SKBI(k, s, 2)); \
  3915. w3 = SPH_T64(w3 + SKBI(k, s, 3)); \
  3916. w4 = SPH_T64(w4 + SKBI(k, s, 4)); \
  3917. w5 = SPH_T64(w5 + SKBI(k, s, 5) + SKBT(t, s, 0)); \
  3918. w6 = SPH_T64(w6 + SKBI(k, s, 6) + SKBT(t, s, 1)); \
  3919. w7 = SPH_T64(w7 + SKBI(k, s, 7) + (sph_u64)s); \
  3920. } while (0)
  3921.  
  3922. #define TFBIG_MIX(x0, x1, rc) do { \
  3923. x0 = SPH_T64(x0 + x1); \
  3924. x1 = SPH_ROTL64(x1, rc) ^ x0; \
  3925. } while (0)
  3926.  
  3927. #define TFBIG_MIX8(w0, w1, w2, w3, w4, w5, w6, w7, rc0, rc1, rc2, rc3) do { \
  3928. TFBIG_MIX(w0, w1, rc0); \
  3929. TFBIG_MIX(w2, w3, rc1); \
  3930. TFBIG_MIX(w4, w5, rc2); \
  3931. TFBIG_MIX(w6, w7, rc3); \
  3932. } while (0)
  3933.  
  3934. #define TFBIG_4e(s) do { \
  3935. TFBIG_ADDKEY(p0, p1, p2, p3, p4, p5, p6, p7, h, t, s); \
  3936. TFBIG_MIX8(p0, p1, p2, p3, p4, p5, p6, p7, 46, 36, 19, 37); \
  3937. TFBIG_MIX8(p2, p1, p4, p7, p6, p5, p0, p3, 33, 27, 14, 42); \
  3938. TFBIG_MIX8(p4, p1, p6, p3, p0, p5, p2, p7, 17, 49, 36, 39); \
  3939. TFBIG_MIX8(p6, p1, p0, p7, p2, p5, p4, p3, 44, 9, 54, 56); \
  3940. } while (0)
  3941.  
  3942. #define TFBIG_4o(s) do { \
  3943. TFBIG_ADDKEY(p0, p1, p2, p3, p4, p5, p6, p7, h, t, s); \
  3944. TFBIG_MIX8(p0, p1, p2, p3, p4, p5, p6, p7, 39, 30, 34, 24); \
  3945. TFBIG_MIX8(p2, p1, p4, p7, p6, p5, p0, p3, 13, 50, 10, 17); \
  3946. TFBIG_MIX8(p4, p1, p6, p3, p0, p5, p2, p7, 25, 29, 39, 43); \
  3947. TFBIG_MIX8(p6, p1, p0, p7, p2, p5, p4, p3, 8, 35, 56, 22); \
  3948. } while (0)
  3949.  
  3950. #define UBI_BIG(etype, extra) do { \
  3951. sph_u64 h8, t0, t1, t2; \
  3952. sph_u64 p0 = m0; \
  3953. sph_u64 p1 = m1; \
  3954. sph_u64 p2 = m2; \
  3955. sph_u64 p3 = m3; \
  3956. sph_u64 p4 = m4; \
  3957. sph_u64 p5 = m5; \
  3958. sph_u64 p6 = m6; \
  3959. sph_u64 p7 = m7; \
  3960. t0 = SPH_T64(bcount << 6) + (sph_u64)(extra); \
  3961. t1 = (bcount >> 58) + ((sph_u64)(etype) << 55); \
  3962. TFBIG_KINIT(h0, h1, h2, h3, h4, h5, h6, h7, h8, t0, t1, t2); \
  3963. TFBIG_4e(0); \
  3964. TFBIG_4o(1); \
  3965. TFBIG_4e(2); \
  3966. TFBIG_4o(3); \
  3967. TFBIG_4e(4); \
  3968. TFBIG_4o(5); \
  3969. TFBIG_4e(6); \
  3970. TFBIG_4o(7); \
  3971. TFBIG_4e(8); \
  3972. TFBIG_4o(9); \
  3973. TFBIG_4e(10); \
  3974. TFBIG_4o(11); \
  3975. TFBIG_4e(12); \
  3976. TFBIG_4o(13); \
  3977. TFBIG_4e(14); \
  3978. TFBIG_4o(15); \
  3979. TFBIG_4e(16); \
  3980. TFBIG_4o(17); \
  3981. TFBIG_ADDKEY(p0, p1, p2, p3, p4, p5, p6, p7, h, t, 18); \
  3982. h0 = m0 ^ p0; \
  3983. h1 = m1 ^ p1; \
  3984. h2 = m2 ^ p2; \
  3985. h3 = m3 ^ p3; \
  3986. h4 = m4 ^ p4; \
  3987. h5 = m5 ^ p5; \
  3988. h6 = m6 ^ p6; \
  3989. h7 = m7 ^ p7; \
  3990. } while (0)
  3991.  
  3992. __constant const sph_u64 SKEIN_IV512[] = {
  3993. SPH_C64(0x4903ADFF749C51CE), SPH_C64(0x0D95DE399746DF03),
  3994. SPH_C64(0x8FD1934127C79BCE), SPH_C64(0x9A255629FF352CB1),
  3995. SPH_C64(0x5DB62599DF6CA7B0), SPH_C64(0xEABE394CA9D5C3F4),
  3996. SPH_C64(0x991112C71A75B523), SPH_C64(0xAE18A40B660FCC33)
  3997. };
  3998.  
  3999. /* $Id: luffa.c 219 2010-06-08 17:24:41Z tp $ */
  4000. /*
  4001. * Luffa implementation.
  4002. *
  4003. * ==========================(LICENSE BEGIN)============================
  4004. *
  4005. * Copyright (c) 2007-2010 Projet RNRT SAPHIR
  4006. *
  4007. * Permission is hereby granted, free of charge, to any person obtaining
  4008. * a copy of this software and associated documentation files (the
  4009. * "Software"), to deal in the Software without restriction, including
  4010. * without limitation the rights to use, copy, modify, merge, publish,
  4011. * distribute, sublicense, and/or sell copies of the Software, and to
  4012. * permit persons to whom the Software is furnished to do so, subject to
  4013. * the following conditions:
  4014. *
  4015. * The above copyright notice and this permission notice shall be
  4016. * included in all copies or substantial portions of the Software.
  4017. *
  4018. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  4019. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  4020. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  4021. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  4022. * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  4023. * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  4024. * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  4025. *
  4026. * ===========================(LICENSE END)=============================
  4027. *
  4028. * @author Thomas Pornin <thomas.pornin@cryptolog.com>
  4029. */
  4030.  
  4031. #ifdef __cplusplus
  4032. extern "C"{
  4033. #endif
  4034.  
  4035. #if SPH_64_TRUE && !defined SPH_LUFFA_PARALLEL
  4036. #define SPH_LUFFA_PARALLEL 1
  4037. #endif
  4038.  
  4039. __constant const sph_u32 V_INIT[5][8] = {
  4040. {
  4041. SPH_C32(0x6d251e69), SPH_C32(0x44b051e0),
  4042. SPH_C32(0x4eaa6fb4), SPH_C32(0xdbf78465),
  4043. SPH_C32(0x6e292011), SPH_C32(0x90152df4),
  4044. SPH_C32(0xee058139), SPH_C32(0xdef610bb)
  4045. }, {
  4046. SPH_C32(0xc3b44b95), SPH_C32(0xd9d2f256),
  4047. SPH_C32(0x70eee9a0), SPH_C32(0xde099fa3),
  4048. SPH_C32(0x5d9b0557), SPH_C32(0x8fc944b3),
  4049. SPH_C32(0xcf1ccf0e), SPH_C32(0x746cd581)
  4050. }, {
  4051. SPH_C32(0xf7efc89d), SPH_C32(0x5dba5781),
  4052. SPH_C32(0x04016ce5), SPH_C32(0xad659c05),
  4053. SPH_C32(0x0306194f), SPH_C32(0x666d1836),
  4054. SPH_C32(0x24aa230a), SPH_C32(0x8b264ae7)
  4055. }, {
  4056. SPH_C32(0x858075d5), SPH_C32(0x36d79cce),
  4057. SPH_C32(0xe571f7d7), SPH_C32(0x204b1f67),
  4058. SPH_C32(0x35870c6a), SPH_C32(0x57e9e923),
  4059. SPH_C32(0x14bcb808), SPH_C32(0x7cde72ce)
  4060. }, {
  4061. SPH_C32(0x6c68e9be), SPH_C32(0x5ec41e22),
  4062. SPH_C32(0xc825b7c7), SPH_C32(0xaffb4363),
  4063. SPH_C32(0xf5df3999), SPH_C32(0x0fc688f1),
  4064. SPH_C32(0xb07224cc), SPH_C32(0x03e86cea)
  4065. }
  4066. };
  4067.  
  4068. __constant const sph_u32 RC00[8] = {
  4069. SPH_C32(0x303994a6), SPH_C32(0xc0e65299),
  4070. SPH_C32(0x6cc33a12), SPH_C32(0xdc56983e),
  4071. SPH_C32(0x1e00108f), SPH_C32(0x7800423d),
  4072. SPH_C32(0x8f5b7882), SPH_C32(0x96e1db12)
  4073. };
  4074.  
  4075. __constant const sph_u32 RC04[8] = {
  4076. SPH_C32(0xe0337818), SPH_C32(0x441ba90d),
  4077. SPH_C32(0x7f34d442), SPH_C32(0x9389217f),
  4078. SPH_C32(0xe5a8bce6), SPH_C32(0x5274baf4),
  4079. SPH_C32(0x26889ba7), SPH_C32(0x9a226e9d)
  4080. };
  4081.  
  4082. __constant const sph_u32 RC10[8] = {
  4083. SPH_C32(0xb6de10ed), SPH_C32(0x70f47aae),
  4084. SPH_C32(0x0707a3d4), SPH_C32(0x1c1e8f51),
  4085. SPH_C32(0x707a3d45), SPH_C32(0xaeb28562),
  4086. SPH_C32(0xbaca1589), SPH_C32(0x40a46f3e)
  4087. };
  4088.  
  4089. __constant const sph_u32 RC14[8] = {
  4090. SPH_C32(0x01685f3d), SPH_C32(0x05a17cf4),
  4091. SPH_C32(0xbd09caca), SPH_C32(0xf4272b28),
  4092. SPH_C32(0x144ae5cc), SPH_C32(0xfaa7ae2b),
  4093. SPH_C32(0x2e48f1c1), SPH_C32(0xb923c704)
  4094. };
  4095.  
  4096. #if SPH_LUFFA_PARALLEL
  4097.  
  4098. __constant const sph_u64 RCW010[8] = {
  4099. SPH_C64(0xb6de10ed303994a6), SPH_C64(0x70f47aaec0e65299),
  4100. SPH_C64(0x0707a3d46cc33a12), SPH_C64(0x1c1e8f51dc56983e),
  4101. SPH_C64(0x707a3d451e00108f), SPH_C64(0xaeb285627800423d),
  4102. SPH_C64(0xbaca15898f5b7882), SPH_C64(0x40a46f3e96e1db12)
  4103. };
  4104.  
  4105. __constant const sph_u64 RCW014[8] = {
  4106. SPH_C64(0x01685f3de0337818), SPH_C64(0x05a17cf4441ba90d),
  4107. SPH_C64(0xbd09caca7f34d442), SPH_C64(0xf4272b289389217f),
  4108. SPH_C64(0x144ae5cce5a8bce6), SPH_C64(0xfaa7ae2b5274baf4),
  4109. SPH_C64(0x2e48f1c126889ba7), SPH_C64(0xb923c7049a226e9d)
  4110. };
  4111.  
  4112. #endif
  4113.  
  4114. __constant const sph_u32 RC20[8] = {
  4115. SPH_C32(0xfc20d9d2), SPH_C32(0x34552e25),
  4116. SPH_C32(0x7ad8818f), SPH_C32(0x8438764a),
  4117. SPH_C32(0xbb6de032), SPH_C32(0xedb780c8),
  4118. SPH_C32(0xd9847356), SPH_C32(0xa2c78434)
  4119. };
  4120.  
  4121. __constant const sph_u32 RC24[8] = {
  4122. SPH_C32(0xe25e72c1), SPH_C32(0xe623bb72),
  4123. SPH_C32(0x5c58a4a4), SPH_C32(0x1e38e2e7),
  4124. SPH_C32(0x78e38b9d), SPH_C32(0x27586719),
  4125. SPH_C32(0x36eda57f), SPH_C32(0x703aace7)
  4126. };
  4127.  
  4128. __constant const sph_u32 RC30[8] = {
  4129. SPH_C32(0xb213afa5), SPH_C32(0xc84ebe95),
  4130. SPH_C32(0x4e608a22), SPH_C32(0x56d858fe),
  4131. SPH_C32(0x343b138f), SPH_C32(0xd0ec4e3d),
  4132. SPH_C32(0x2ceb4882), SPH_C32(0xb3ad2208)
  4133. };
  4134.  
  4135. __constant const sph_u32 RC34[8] = {
  4136. SPH_C32(0xe028c9bf), SPH_C32(0x44756f91),
  4137. SPH_C32(0x7e8fce32), SPH_C32(0x956548be),
  4138. SPH_C32(0xfe191be2), SPH_C32(0x3cb226e5),
  4139. SPH_C32(0x5944a28e), SPH_C32(0xa1c4c355)
  4140. };
  4141.  
  4142. #if SPH_LUFFA_PARALLEL
  4143.  
  4144. __constant const sph_u64 RCW230[8] = {
  4145. SPH_C64(0xb213afa5fc20d9d2), SPH_C64(0xc84ebe9534552e25),
  4146. SPH_C64(0x4e608a227ad8818f), SPH_C64(0x56d858fe8438764a),
  4147. SPH_C64(0x343b138fbb6de032), SPH_C64(0xd0ec4e3dedb780c8),
  4148. SPH_C64(0x2ceb4882d9847356), SPH_C64(0xb3ad2208a2c78434)
  4149. };
  4150.  
  4151.  
  4152. __constant const sph_u64 RCW234[8] = {
  4153. SPH_C64(0xe028c9bfe25e72c1), SPH_C64(0x44756f91e623bb72),
  4154. SPH_C64(0x7e8fce325c58a4a4), SPH_C64(0x956548be1e38e2e7),
  4155. SPH_C64(0xfe191be278e38b9d), SPH_C64(0x3cb226e527586719),
  4156. SPH_C64(0x5944a28e36eda57f), SPH_C64(0xa1c4c355703aace7)
  4157. };
  4158.  
  4159. #endif
  4160.  
  4161. __constant const sph_u32 RC40[8] = {
  4162. SPH_C32(0xf0d2e9e3), SPH_C32(0xac11d7fa),
  4163. SPH_C32(0x1bcb66f2), SPH_C32(0x6f2d9bc9),
  4164. SPH_C32(0x78602649), SPH_C32(0x8edae952),
  4165. SPH_C32(0x3b6ba548), SPH_C32(0xedae9520)
  4166. };
  4167.  
  4168. __constant const sph_u32 RC44[8] = {
  4169. SPH_C32(0x5090d577), SPH_C32(0x2d1925ab),
  4170. SPH_C32(0xb46496ac), SPH_C32(0xd1925ab0),
  4171. SPH_C32(0x29131ab6), SPH_C32(0x0fc053c3),
  4172. SPH_C32(0x3f014f0c), SPH_C32(0xfc053c31)
  4173. };
  4174.  
  4175. #define DECL_TMP8(w) \
  4176. sph_u32 w ## 0, w ## 1, w ## 2, w ## 3, w ## 4, w ## 5, w ## 6, w ## 7;
  4177.  
  4178. #define M2(d, s) do { \
  4179. sph_u32 tmp = s ## 7; \
  4180. d ## 7 = s ## 6; \
  4181. d ## 6 = s ## 5; \
  4182. d ## 5 = s ## 4; \
  4183. d ## 4 = s ## 3 ^ tmp; \
  4184. d ## 3 = s ## 2 ^ tmp; \
  4185. d ## 2 = s ## 1; \
  4186. d ## 1 = s ## 0 ^ tmp; \
  4187. d ## 0 = tmp; \
  4188. } while (0)
  4189.  
  4190. #define XOR(d, s1, s2) do { \
  4191. d ## 0 = s1 ## 0 ^ s2 ## 0; \
  4192. d ## 1 = s1 ## 1 ^ s2 ## 1; \
  4193. d ## 2 = s1 ## 2 ^ s2 ## 2; \
  4194. d ## 3 = s1 ## 3 ^ s2 ## 3; \
  4195. d ## 4 = s1 ## 4 ^ s2 ## 4; \
  4196. d ## 5 = s1 ## 5 ^ s2 ## 5; \
  4197. d ## 6 = s1 ## 6 ^ s2 ## 6; \
  4198. d ## 7 = s1 ## 7 ^ s2 ## 7; \
  4199. } while (0)
  4200.  
  4201. #if SPH_LUFFA_PARALLEL
  4202.  
  4203. #define SUB_CRUMB_GEN(a0, a1, a2, a3, width) do { \
  4204. sph_u ## width tmp; \
  4205. tmp = (a0); \
  4206. (a0) |= (a1); \
  4207. (a2) ^= (a3); \
  4208. (a1) = SPH_T ## width(~(a1)); \
  4209. (a0) ^= (a3); \
  4210. (a3) &= tmp; \
  4211. (a1) ^= (a3); \
  4212. (a3) ^= (a2); \
  4213. (a2) &= (a0); \
  4214. (a0) = SPH_T ## width(~(a0)); \
  4215. (a2) ^= (a1); \
  4216. (a1) |= (a3); \
  4217. tmp ^= (a1); \
  4218. (a3) ^= (a2); \
  4219. (a2) &= (a1); \
  4220. (a1) ^= (a0); \
  4221. (a0) = tmp; \
  4222. } while (0)
  4223.  
  4224. #define SUB_CRUMB(a0, a1, a2, a3) SUB_CRUMB_GEN(a0, a1, a2, a3, 32)
  4225. #define SUB_CRUMBW(a0, a1, a2, a3) SUB_CRUMB_GEN(a0, a1, a2, a3, 64)
  4226.  
  4227. #define MIX_WORDW(u, v) do { \
  4228. sph_u32 ul, uh, vl, vh; \
  4229. (v) ^= (u); \
  4230. ul = SPH_T32((sph_u32)(u)); \
  4231. uh = SPH_T32((sph_u32)((u) >> 32)); \
  4232. vl = SPH_T32((sph_u32)(v)); \
  4233. vh = SPH_T32((sph_u32)((v) >> 32)); \
  4234. ul = SPH_ROTL32(ul, 2) ^ vl; \
  4235. vl = SPH_ROTL32(vl, 14) ^ ul; \
  4236. ul = SPH_ROTL32(ul, 10) ^ vl; \
  4237. vl = SPH_ROTL32(vl, 1); \
  4238. uh = SPH_ROTL32(uh, 2) ^ vh; \
  4239. vh = SPH_ROTL32(vh, 14) ^ uh; \
  4240. uh = SPH_ROTL32(uh, 10) ^ vh; \
  4241. vh = SPH_ROTL32(vh, 1); \
  4242. (u) = (sph_u64)ul | ((sph_u64)uh << 32); \
  4243. (v) = (sph_u64)vl | ((sph_u64)vh << 32); \
  4244. } while (0)
  4245.  
  4246. #else
  4247.  
  4248. #define SUB_CRUMB(a0, a1, a2, a3) do { \
  4249. sph_u32 tmp; \
  4250. tmp = (a0); \
  4251. (a0) |= (a1); \
  4252. (a2) ^= (a3); \
  4253. (a1) = SPH_T32(~(a1)); \
  4254. (a0) ^= (a3); \
  4255. (a3) &= tmp; \
  4256. (a1) ^= (a3); \
  4257. (a3) ^= (a2); \
  4258. (a2) &= (a0); \
  4259. (a0) = SPH_T32(~(a0)); \
  4260. (a2) ^= (a1); \
  4261. (a1) |= (a3); \
  4262. tmp ^= (a1); \
  4263. (a3) ^= (a2); \
  4264. (a2) &= (a1); \
  4265. (a1) ^= (a0); \
  4266. (a0) = tmp; \
  4267. } while (0)
  4268.  
  4269. #endif
  4270.  
  4271. #define MIX_WORD(u, v) do { \
  4272. (v) ^= (u); \
  4273. (u) = SPH_ROTL32((u), 2) ^ (v); \
  4274. (v) = SPH_ROTL32((v), 14) ^ (u); \
  4275. (u) = SPH_ROTL32((u), 10) ^ (v); \
  4276. (v) = SPH_ROTL32((v), 1); \
  4277. } while (0)
  4278.  
  4279. #define MI5 do { \
  4280. DECL_TMP8(a) \
  4281. DECL_TMP8(b) \
  4282. XOR(a, V0, V1); \
  4283. XOR(b, V2, V3); \
  4284. XOR(a, a, b); \
  4285. XOR(a, a, V4); \
  4286. M2(a, a); \
  4287. XOR(V0, a, V0); \
  4288. XOR(V1, a, V1); \
  4289. XOR(V2, a, V2); \
  4290. XOR(V3, a, V3); \
  4291. XOR(V4, a, V4); \
  4292. M2(b, V0); \
  4293. XOR(b, b, V1); \
  4294. M2(V1, V1); \
  4295. XOR(V1, V1, V2); \
  4296. M2(V2, V2); \
  4297. XOR(V2, V2, V3); \
  4298. M2(V3, V3); \
  4299. XOR(V3, V3, V4); \
  4300. M2(V4, V4); \
  4301. XOR(V4, V4, V0); \
  4302. M2(V0, b); \
  4303. XOR(V0, V0, V4); \
  4304. M2(V4, V4); \
  4305. XOR(V4, V4, V3); \
  4306. M2(V3, V3); \
  4307. XOR(V3, V3, V2); \
  4308. M2(V2, V2); \
  4309. XOR(V2, V2, V1); \
  4310. M2(V1, V1); \
  4311. XOR(V1, V1, b); \
  4312. XOR(V0, V0, M); \
  4313. M2(M, M); \
  4314. XOR(V1, V1, M); \
  4315. M2(M, M); \
  4316. XOR(V2, V2, M); \
  4317. M2(M, M); \
  4318. XOR(V3, V3, M); \
  4319. M2(M, M); \
  4320. XOR(V4, V4, M); \
  4321. } while (0)
  4322.  
  4323. #define TWEAK5 do { \
  4324. V14 = SPH_ROTL32(V14, 1); \
  4325. V15 = SPH_ROTL32(V15, 1); \
  4326. V16 = SPH_ROTL32(V16, 1); \
  4327. V17 = SPH_ROTL32(V17, 1); \
  4328. V24 = SPH_ROTL32(V24, 2); \
  4329. V25 = SPH_ROTL32(V25, 2); \
  4330. V26 = SPH_ROTL32(V26, 2); \
  4331. V27 = SPH_ROTL32(V27, 2); \
  4332. V34 = SPH_ROTL32(V34, 3); \
  4333. V35 = SPH_ROTL32(V35, 3); \
  4334. V36 = SPH_ROTL32(V36, 3); \
  4335. V37 = SPH_ROTL32(V37, 3); \
  4336. V44 = SPH_ROTL32(V44, 4); \
  4337. V45 = SPH_ROTL32(V45, 4); \
  4338. V46 = SPH_ROTL32(V46, 4); \
  4339. V47 = SPH_ROTL32(V47, 4); \
  4340. } while (0)
  4341.  
  4342. #if SPH_LUFFA_PARALLEL
  4343.  
  4344. #define LUFFA_P5 do { \
  4345. int r; \
  4346. sph_u64 W0, W1, W2, W3, W4, W5, W6, W7; \
  4347. TWEAK5; \
  4348. W0 = (sph_u64)V00 | ((sph_u64)V10 << 32); \
  4349. W1 = (sph_u64)V01 | ((sph_u64)V11 << 32); \
  4350. W2 = (sph_u64)V02 | ((sph_u64)V12 << 32); \
  4351. W3 = (sph_u64)V03 | ((sph_u64)V13 << 32); \
  4352. W4 = (sph_u64)V04 | ((sph_u64)V14 << 32); \
  4353. W5 = (sph_u64)V05 | ((sph_u64)V15 << 32); \
  4354. W6 = (sph_u64)V06 | ((sph_u64)V16 << 32); \
  4355. W7 = (sph_u64)V07 | ((sph_u64)V17 << 32); \
  4356. for (r = 0; r < 8; r ++) { \
  4357. SUB_CRUMBW(W0, W1, W2, W3); \
  4358. SUB_CRUMBW(W5, W6, W7, W4); \
  4359. MIX_WORDW(W0, W4); \
  4360. MIX_WORDW(W1, W5); \
  4361. MIX_WORDW(W2, W6); \
  4362. MIX_WORDW(W3, W7); \
  4363. W0 ^= RCW010[r]; \
  4364. W4 ^= RCW014[r]; \
  4365. } \
  4366. V00 = SPH_T32((sph_u32)W0); \
  4367. V10 = SPH_T32((sph_u32)(W0 >> 32)); \
  4368. V01 = SPH_T32((sph_u32)W1); \
  4369. V11 = SPH_T32((sph_u32)(W1 >> 32)); \
  4370. V02 = SPH_T32((sph_u32)W2); \
  4371. V12 = SPH_T32((sph_u32)(W2 >> 32)); \
  4372. V03 = SPH_T32((sph_u32)W3); \
  4373. V13 = SPH_T32((sph_u32)(W3 >> 32)); \
  4374. V04 = SPH_T32((sph_u32)W4); \
  4375. V14 = SPH_T32((sph_u32)(W4 >> 32)); \
  4376. V05 = SPH_T32((sph_u32)W5); \
  4377. V15 = SPH_T32((sph_u32)(W5 >> 32)); \
  4378. V06 = SPH_T32((sph_u32)W6); \
  4379. V16 = SPH_T32((sph_u32)(W6 >> 32)); \
  4380. V07 = SPH_T32((sph_u32)W7); \
  4381. V17 = SPH_T32((sph_u32)(W7 >> 32)); \
  4382. W0 = (sph_u64)V20 | ((sph_u64)V30 << 32); \
  4383. W1 = (sph_u64)V21 | ((sph_u64)V31 << 32); \
  4384. W2 = (sph_u64)V22 | ((sph_u64)V32 << 32); \
  4385. W3 = (sph_u64)V23 | ((sph_u64)V33 << 32); \
  4386. W4 = (sph_u64)V24 | ((sph_u64)V34 << 32); \
  4387. W5 = (sph_u64)V25 | ((sph_u64)V35 << 32); \
  4388. W6 = (sph_u64)V26 | ((sph_u64)V36 << 32); \
  4389. W7 = (sph_u64)V27 | ((sph_u64)V37 << 32); \
  4390. for (r = 0; r < 8; r ++) { \
  4391. SUB_CRUMBW(W0, W1, W2, W3); \
  4392. SUB_CRUMBW(W5, W6, W7, W4); \
  4393. MIX_WORDW(W0, W4); \
  4394. MIX_WORDW(W1, W5); \
  4395. MIX_WORDW(W2, W6); \
  4396. MIX_WORDW(W3, W7); \
  4397. W0 ^= RCW230[r]; \
  4398. W4 ^= RCW234[r]; \
  4399. } \
  4400. V20 = SPH_T32((sph_u32)W0); \
  4401. V30 = SPH_T32((sph_u32)(W0 >> 32)); \
  4402. V21 = SPH_T32((sph_u32)W1); \
  4403. V31 = SPH_T32((sph_u32)(W1 >> 32)); \
  4404. V22 = SPH_T32((sph_u32)W2); \
  4405. V32 = SPH_T32((sph_u32)(W2 >> 32)); \
  4406. V23 = SPH_T32((sph_u32)W3); \
  4407. V33 = SPH_T32((sph_u32)(W3 >> 32)); \
  4408. V24 = SPH_T32((sph_u32)W4); \
  4409. V34 = SPH_T32((sph_u32)(W4 >> 32)); \
  4410. V25 = SPH_T32((sph_u32)W5); \
  4411. V35 = SPH_T32((sph_u32)(W5 >> 32)); \
  4412. V26 = SPH_T32((sph_u32)W6); \
  4413. V36 = SPH_T32((sph_u32)(W6 >> 32)); \
  4414. V27 = SPH_T32((sph_u32)W7); \
  4415. V37 = SPH_T32((sph_u32)(W7 >> 32)); \
  4416. for (r = 0; r < 8; r ++) { \
  4417. SUB_CRUMB(V40, V41, V42, V43); \
  4418. SUB_CRUMB(V45, V46, V47, V44); \
  4419. MIX_WORD(V40, V44); \
  4420. MIX_WORD(V41, V45); \
  4421. MIX_WORD(V42, V46); \
  4422. MIX_WORD(V43, V47); \
  4423. V40 ^= RC40[r]; \
  4424. V44 ^= RC44[r]; \
  4425. } \
  4426. } while (0)
  4427.  
  4428. #else
  4429.  
  4430. #define LUFFA_P5 do { \
  4431. int r; \
  4432. TWEAK5; \
  4433. for (r = 0; r < 8; r ++) { \
  4434. SUB_CRUMB(V00, V01, V02, V03); \
  4435. SUB_CRUMB(V05, V06, V07, V04); \
  4436. MIX_WORD(V00, V04); \
  4437. MIX_WORD(V01, V05); \
  4438. MIX_WORD(V02, V06); \
  4439. MIX_WORD(V03, V07); \
  4440. V00 ^= RC00[r]; \
  4441. V04 ^= RC04[r]; \
  4442. } \
  4443. for (r = 0; r < 8; r ++) { \
  4444. SUB_CRUMB(V10, V11, V12, V13); \
  4445. SUB_CRUMB(V15, V16, V17, V14); \
  4446. MIX_WORD(V10, V14); \
  4447. MIX_WORD(V11, V15); \
  4448. MIX_WORD(V12, V16); \
  4449. MIX_WORD(V13, V17); \
  4450. V10 ^= RC10[r]; \
  4451. V14 ^= RC14[r]; \
  4452. } \
  4453. for (r = 0; r < 8; r ++) { \
  4454. SUB_CRUMB(V20, V21, V22, V23); \
  4455. SUB_CRUMB(V25, V26, V27, V24); \
  4456. MIX_WORD(V20, V24); \
  4457. MIX_WORD(V21, V25); \
  4458. MIX_WORD(V22, V26); \
  4459. MIX_WORD(V23, V27); \
  4460. V20 ^= RC20[r]; \
  4461. V24 ^= RC24[r]; \
  4462. } \
  4463. for (r = 0; r < 8; r ++) { \
  4464. SUB_CRUMB(V30, V31, V32, V33); \
  4465. SUB_CRUMB(V35, V36, V37, V34); \
  4466. MIX_WORD(V30, V34); \
  4467. MIX_WORD(V31, V35); \
  4468. MIX_WORD(V32, V36); \
  4469. MIX_WORD(V33, V37); \
  4470. V30 ^= RC30[r]; \
  4471. V34 ^= RC34[r]; \
  4472. } \
  4473. for (r = 0; r < 8; r ++) { \
  4474. SUB_CRUMB(V40, V41, V42, V43); \
  4475. SUB_CRUMB(V45, V46, V47, V44); \
  4476. MIX_WORD(V40, V44); \
  4477. MIX_WORD(V41, V45); \
  4478. MIX_WORD(V42, V46); \
  4479. MIX_WORD(V43, V47); \
  4480. V40 ^= RC40[r]; \
  4481. V44 ^= RC44[r]; \
  4482. } \
  4483. } while (0)
  4484.  
  4485. #endif
  4486.  
  4487. /* $Id: cubehash.c 227 2010-06-16 17:28:38Z tp $ */
  4488. /*
  4489. * CubeHash implementation.
  4490. *
  4491. * ==========================(LICENSE BEGIN)============================
  4492. *
  4493. * Copyright (c) 2007-2010 Projet RNRT SAPHIR
  4494. *
  4495. * Permission is hereby granted, free of charge, to any person obtaining
  4496. * a copy of this software and associated documentation files (the
  4497. * "Software"), to deal in the Software without restriction, including
  4498. * without limitation the rights to use, copy, modify, merge, publish,
  4499. * distribute, sublicense, and/or sell copies of the Software, and to
  4500. * permit persons to whom the Software is furnished to do so, subject to
  4501. * the following conditions:
  4502. *
  4503. * The above copyright notice and this permission notice shall be
  4504. * included in all copies or substantial portions of the Software.
  4505. *
  4506. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  4507. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  4508. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  4509. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  4510. * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  4511. * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  4512. * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  4513. *
  4514. * ===========================(LICENSE END)=============================
  4515. *
  4516. * @author Thomas Pornin <thomas.pornin@cryptolog.com>
  4517. */
  4518.  
  4519. /*
  4520. * Some tests were conducted on an Intel Core2 Q6600 (32-bit and 64-bit
  4521. * mode), a PowerPC G3, and a MIPS-compatible CPU (Broadcom BCM3302).
  4522. * It appears that the optimal settings are:
  4523. * -- full unroll, no state copy on the "big" systems (x86, PowerPC)
  4524. * -- unroll to 4 or 8, state copy on the "small" system (MIPS)
  4525. */
  4526.  
  4527. #if !defined SPH_CUBEHASH_UNROLL
  4528. #define SPH_CUBEHASH_UNROLL 0
  4529. #endif
  4530.  
  4531. __constant const sph_u32 CUBEHASH_IV512[] = {
  4532. SPH_C32(0x2AEA2A61), SPH_C32(0x50F494D4), SPH_C32(0x2D538B8B),
  4533. SPH_C32(0x4167D83E), SPH_C32(0x3FEE2313), SPH_C32(0xC701CF8C),
  4534. SPH_C32(0xCC39968E), SPH_C32(0x50AC5695), SPH_C32(0x4D42C787),
  4535. SPH_C32(0xA647A8B3), SPH_C32(0x97CF0BEF), SPH_C32(0x825B4537),
  4536. SPH_C32(0xEEF864D2), SPH_C32(0xF22090C4), SPH_C32(0xD0E5CD33),
  4537. SPH_C32(0xA23911AE), SPH_C32(0xFCD398D9), SPH_C32(0x148FE485),
  4538. SPH_C32(0x1B017BEF), SPH_C32(0xB6444532), SPH_C32(0x6A536159),
  4539. SPH_C32(0x2FF5781C), SPH_C32(0x91FA7934), SPH_C32(0x0DBADEA9),
  4540. SPH_C32(0xD65C8A2B), SPH_C32(0xA5A70E75), SPH_C32(0xB1C62456),
  4541. SPH_C32(0xBC796576), SPH_C32(0x1921C8F7), SPH_C32(0xE7989AF1),
  4542. SPH_C32(0x7795D246), SPH_C32(0xD43E3B44)
  4543. };
  4544.  
  4545. #define T32 SPH_T32
  4546. #define ROTL32 SPH_ROTL32
  4547.  
  4548. #define ROUND_EVEN do { \
  4549. xg = T32(x0 + xg); \
  4550. x0 = ROTL32(x0, 7); \
  4551. xh = T32(x1 + xh); \
  4552. x1 = ROTL32(x1, 7); \
  4553. xi = T32(x2 + xi); \
  4554. x2 = ROTL32(x2, 7); \
  4555. xj = T32(x3 + xj); \
  4556. x3 = ROTL32(x3, 7); \
  4557. xk = T32(x4 + xk); \
  4558. x4 = ROTL32(x4, 7); \
  4559. xl = T32(x5 + xl); \
  4560. x5 = ROTL32(x5, 7); \
  4561. xm = T32(x6 + xm); \
  4562. x6 = ROTL32(x6, 7); \
  4563. xn = T32(x7 + xn); \
  4564. x7 = ROTL32(x7, 7); \
  4565. xo = T32(x8 + xo); \
  4566. x8 = ROTL32(x8, 7); \
  4567. xp = T32(x9 + xp); \
  4568. x9 = ROTL32(x9, 7); \
  4569. xq = T32(xa + xq); \
  4570. xa = ROTL32(xa, 7); \
  4571. xr = T32(xb + xr); \
  4572. xb = ROTL32(xb, 7); \
  4573. xs = T32(xc + xs); \
  4574. xc = ROTL32(xc, 7); \
  4575. xt = T32(xd + xt); \
  4576. xd = ROTL32(xd, 7); \
  4577. xu = T32(xe + xu); \
  4578. xe = ROTL32(xe, 7); \
  4579. xv = T32(xf + xv); \
  4580. xf = ROTL32(xf, 7); \
  4581. x8 ^= xg; \
  4582. x9 ^= xh; \
  4583. xa ^= xi; \
  4584. xb ^= xj; \
  4585. xc ^= xk; \
  4586. xd ^= xl; \
  4587. xe ^= xm; \
  4588. xf ^= xn; \
  4589. x0 ^= xo; \
  4590. x1 ^= xp; \
  4591. x2 ^= xq; \
  4592. x3 ^= xr; \
  4593. x4 ^= xs; \
  4594. x5 ^= xt; \
  4595. x6 ^= xu; \
  4596. x7 ^= xv; \
  4597. xi = T32(x8 + xi); \
  4598. x8 = ROTL32(x8, 11); \
  4599. xj = T32(x9 + xj); \
  4600. x9 = ROTL32(x9, 11); \
  4601. xg = T32(xa + xg); \
  4602. xa = ROTL32(xa, 11); \
  4603. xh = T32(xb + xh); \
  4604. xb = ROTL32(xb, 11); \
  4605. xm = T32(xc + xm); \
  4606. xc = ROTL32(xc, 11); \
  4607. xn = T32(xd + xn); \
  4608. xd = ROTL32(xd, 11); \
  4609. xk = T32(xe + xk); \
  4610. xe = ROTL32(xe, 11); \
  4611. xl = T32(xf + xl); \
  4612. xf = ROTL32(xf, 11); \
  4613. xq = T32(x0 + xq); \
  4614. x0 = ROTL32(x0, 11); \
  4615. xr = T32(x1 + xr); \
  4616. x1 = ROTL32(x1, 11); \
  4617. xo = T32(x2 + xo); \
  4618. x2 = ROTL32(x2, 11); \
  4619. xp = T32(x3 + xp); \
  4620. x3 = ROTL32(x3, 11); \
  4621. xu = T32(x4 + xu); \
  4622. x4 = ROTL32(x4, 11); \
  4623. xv = T32(x5 + xv); \
  4624. x5 = ROTL32(x5, 11); \
  4625. xs = T32(x6 + xs); \
  4626. x6 = ROTL32(x6, 11); \
  4627. xt = T32(x7 + xt); \
  4628. x7 = ROTL32(x7, 11); \
  4629. xc ^= xi; \
  4630. xd ^= xj; \
  4631. xe ^= xg; \
  4632. xf ^= xh; \
  4633. x8 ^= xm; \
  4634. x9 ^= xn; \
  4635. xa ^= xk; \
  4636. xb ^= xl; \
  4637. x4 ^= xq; \
  4638. x5 ^= xr; \
  4639. x6 ^= xo; \
  4640. x7 ^= xp; \
  4641. x0 ^= xu; \
  4642. x1 ^= xv; \
  4643. x2 ^= xs; \
  4644. x3 ^= xt; \
  4645. } while (0)
  4646.  
  4647. #define ROUND_ODD do { \
  4648. xj = T32(xc + xj); \
  4649. xc = ROTL32(xc, 7); \
  4650. xi = T32(xd + xi); \
  4651. xd = ROTL32(xd, 7); \
  4652. xh = T32(xe + xh); \
  4653. xe = ROTL32(xe, 7); \
  4654. xg = T32(xf + xg); \
  4655. xf = ROTL32(xf, 7); \
  4656. xn = T32(x8 + xn); \
  4657. x8 = ROTL32(x8, 7); \
  4658. xm = T32(x9 + xm); \
  4659. x9 = ROTL32(x9, 7); \
  4660. xl = T32(xa + xl); \
  4661. xa = ROTL32(xa, 7); \
  4662. xk = T32(xb + xk); \
  4663. xb = ROTL32(xb, 7); \
  4664. xr = T32(x4 + xr); \
  4665. x4 = ROTL32(x4, 7); \
  4666. xq = T32(x5 + xq); \
  4667. x5 = ROTL32(x5, 7); \
  4668. xp = T32(x6 + xp); \
  4669. x6 = ROTL32(x6, 7); \
  4670. xo = T32(x7 + xo); \
  4671. x7 = ROTL32(x7, 7); \
  4672. xv = T32(x0 + xv); \
  4673. x0 = ROTL32(x0, 7); \
  4674. xu = T32(x1 + xu); \
  4675. x1 = ROTL32(x1, 7); \
  4676. xt = T32(x2 + xt); \
  4677. x2 = ROTL32(x2, 7); \
  4678. xs = T32(x3 + xs); \
  4679. x3 = ROTL32(x3, 7); \
  4680. x4 ^= xj; \
  4681. x5 ^= xi; \
  4682. x6 ^= xh; \
  4683. x7 ^= xg; \
  4684. x0 ^= xn; \
  4685. x1 ^= xm; \
  4686. x2 ^= xl; \
  4687. x3 ^= xk; \
  4688. xc ^= xr; \
  4689. xd ^= xq; \
  4690. xe ^= xp; \
  4691. xf ^= xo; \
  4692. x8 ^= xv; \
  4693. x9 ^= xu; \
  4694. xa ^= xt; \
  4695. xb ^= xs; \
  4696. xh = T32(x4 + xh); \
  4697. x4 = ROTL32(x4, 11); \
  4698. xg = T32(x5 + xg); \
  4699. x5 = ROTL32(x5, 11); \
  4700. xj = T32(x6 + xj); \
  4701. x6 = ROTL32(x6, 11); \
  4702. xi = T32(x7 + xi); \
  4703. x7 = ROTL32(x7, 11); \
  4704. xl = T32(x0 + xl); \
  4705. x0 = ROTL32(x0, 11); \
  4706. xk = T32(x1 + xk); \
  4707. x1 = ROTL32(x1, 11); \
  4708. xn = T32(x2 + xn); \
  4709. x2 = ROTL32(x2, 11); \
  4710. xm = T32(x3 + xm); \
  4711. x3 = ROTL32(x3, 11); \
  4712. xp = T32(xc + xp); \
  4713. xc = ROTL32(xc, 11); \
  4714. xo = T32(xd + xo); \
  4715. xd = ROTL32(xd, 11); \
  4716. xr = T32(xe + xr); \
  4717. xe = ROTL32(xe, 11); \
  4718. xq = T32(xf + xq); \
  4719. xf = ROTL32(xf, 11); \
  4720. xt = T32(x8 + xt); \
  4721. x8 = ROTL32(x8, 11); \
  4722. xs = T32(x9 + xs); \
  4723. x9 = ROTL32(x9, 11); \
  4724. xv = T32(xa + xv); \
  4725. xa = ROTL32(xa, 11); \
  4726. xu = T32(xb + xu); \
  4727. xb = ROTL32(xb, 11); \
  4728. x0 ^= xh; \
  4729. x1 ^= xg; \
  4730. x2 ^= xj; \
  4731. x3 ^= xi; \
  4732. x4 ^= xl; \
  4733. x5 ^= xk; \
  4734. x6 ^= xn; \
  4735. x7 ^= xm; \
  4736. x8 ^= xp; \
  4737. x9 ^= xo; \
  4738. xa ^= xr; \
  4739. xb ^= xq; \
  4740. xc ^= xt; \
  4741. xd ^= xs; \
  4742. xe ^= xv; \
  4743. xf ^= xu; \
  4744. } while (0)
  4745.  
  4746. /*
  4747. * There is no need to unroll all 16 rounds. The word-swapping permutation
  4748. * is an involution, so we need to unroll an even number of rounds. On
  4749. * "big" systems, unrolling 4 rounds yields about 97% of the speed
  4750. * achieved with full unrolling; and it keeps the code more compact
  4751. * for small architectures.
  4752. */
  4753.  
  4754. #if SPH_CUBEHASH_UNROLL == 2
  4755.  
  4756. #define SIXTEEN_ROUNDS do { \
  4757. int j; \
  4758. for (j = 0; j < 8; j ++) { \
  4759. ROUND_EVEN; \
  4760. ROUND_ODD; \
  4761. } \
  4762. } while (0)
  4763.  
  4764. #elif SPH_CUBEHASH_UNROLL == 4
  4765.  
  4766. #define SIXTEEN_ROUNDS do { \
  4767. int j; \
  4768. for (j = 0; j < 4; j ++) { \
  4769. ROUND_EVEN; \
  4770. ROUND_ODD; \
  4771. ROUND_EVEN; \
  4772. ROUND_ODD; \
  4773. } \
  4774. } while (0)
  4775.  
  4776. #elif SPH_CUBEHASH_UNROLL == 8
  4777.  
  4778. #define SIXTEEN_ROUNDS do { \
  4779. int j; \
  4780. for (j = 0; j < 2; j ++) { \
  4781. ROUND_EVEN; \
  4782. ROUND_ODD; \
  4783. ROUND_EVEN; \
  4784. ROUND_ODD; \
  4785. ROUND_EVEN; \
  4786. ROUND_ODD; \
  4787. ROUND_EVEN; \
  4788. ROUND_ODD; \
  4789. } \
  4790. } while (0)
  4791.  
  4792. #else
  4793.  
  4794. #define SIXTEEN_ROUNDS do { \
  4795. ROUND_EVEN; \
  4796. ROUND_ODD; \
  4797. ROUND_EVEN; \
  4798. ROUND_ODD; \
  4799. ROUND_EVEN; \
  4800. ROUND_ODD; \
  4801. ROUND_EVEN; \
  4802. ROUND_ODD; \
  4803. ROUND_EVEN; \
  4804. ROUND_ODD; \
  4805. ROUND_EVEN; \
  4806. ROUND_ODD; \
  4807. ROUND_EVEN; \
  4808. ROUND_ODD; \
  4809. ROUND_EVEN; \
  4810. ROUND_ODD; \
  4811. } while (0)
  4812.  
  4813. #endif
  4814.  
  4815. /* $Id: shavite.c 227 2010-06-16 17:28:38Z tp $ */
  4816. /*
  4817. * SHAvite-3 implementation.
  4818. *
  4819. * ==========================(LICENSE BEGIN)============================
  4820. *
  4821. * Copyright (c) 2007-2010 Projet RNRT SAPHIR
  4822. *
  4823. * Permission is hereby granted, free of charge, to any person obtaining
  4824. * a copy of this software and associated documentation files (the
  4825. * "Software"), to deal in the Software without restriction, including
  4826. * without limitation the rights to use, copy, modify, merge, publish,
  4827. * distribute, sublicense, and/or sell copies of the Software, and to
  4828. * permit persons to whom the Software is furnished to do so, subject to
  4829. * the following conditions:
  4830. *
  4831. * The above copyright notice and this permission notice shall be
  4832. * included in all copies or substantial portions of the Software.
  4833. *
  4834. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  4835. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  4836. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  4837. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  4838. * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  4839. * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  4840. * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  4841. *
  4842. * ===========================(LICENSE END)=============================
  4843. *
  4844. * @author Thomas Pornin <thomas.pornin@cryptolog.com>
  4845. */
  4846.  
  4847. /*
  4848. * As of round 2 of the SHA-3 competition, the published reference
  4849. * implementation and test vectors are wrong, because they use
  4850. * big-endian AES tables while the internal decoding uses little-endian.
  4851. * The code below follows the specification. To turn it into a code
  4852. * which follows the reference implementation (the one called "BugFix"
  4853. * on the SHAvite-3 web site, published on Nov 23rd, 2009), comment out
  4854. * the code below (from the '#define AES_BIG_ENDIAN...' to the definition
  4855. * of the AES_ROUND_NOKEY macro) and replace it with the version which
  4856. * is commented out afterwards.
  4857. */
  4858.  
  4859. #define AES_BIG_ENDIAN 0
  4860.  
  4861. #define AES_ROUND_NOKEY(x0, x1, x2, x3) do { \
  4862. sph_u32 t0 = (x0); \
  4863. sph_u32 t1 = (x1); \
  4864. sph_u32 t2 = (x2); \
  4865. sph_u32 t3 = (x3); \
  4866. AES_ROUND_NOKEY_LE(t0, t1, t2, t3, x0, x1, x2, x3); \
  4867. } while (0)
  4868.  
  4869. #define KEY_EXPAND_ELT(k0, k1, k2, k3) do { \
  4870. sph_u32 kt; \
  4871. AES_ROUND_NOKEY(k1, k2, k3, k0); \
  4872. kt = (k0); \
  4873. (k0) = (k1); \
  4874. (k1) = (k2); \
  4875. (k2) = (k3); \
  4876. (k3) = kt; \
  4877. } while (0)
  4878.  
  4879. /*
  4880. * This function assumes that "msg" is aligned for 32-bit access.
  4881. */
  4882. #define c512(msg) do { \
  4883. sph_u32 p0, p1, p2, p3, p4, p5, p6, p7; \
  4884. sph_u32 p8, p9, pA, pB, pC, pD, pE, pF; \
  4885. sph_u32 x0, x1, x2, x3; \
  4886. int r; \
  4887. \
  4888. p0 = h0; \
  4889. p1 = h1; \
  4890. p2 = h2; \
  4891. p3 = h3; \
  4892. p4 = h4; \
  4893. p5 = h5; \
  4894. p6 = h6; \
  4895. p7 = h7; \
  4896. p8 = h8; \
  4897. p9 = h9; \
  4898. pA = hA; \
  4899. pB = hB; \
  4900. pC = hC; \
  4901. pD = hD; \
  4902. pE = hE; \
  4903. pF = hF; \
  4904. /* round 0 */ \
  4905. x0 = p4 ^ rk00; \
  4906. x1 = p5 ^ rk01; \
  4907. x2 = p6 ^ rk02; \
  4908. x3 = p7 ^ rk03; \
  4909. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  4910. x0 ^= rk04; \
  4911. x1 ^= rk05; \
  4912. x2 ^= rk06; \
  4913. x3 ^= rk07; \
  4914. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  4915. x0 ^= rk08; \
  4916. x1 ^= rk09; \
  4917. x2 ^= rk0A; \
  4918. x3 ^= rk0B; \
  4919. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  4920. x0 ^= rk0C; \
  4921. x1 ^= rk0D; \
  4922. x2 ^= rk0E; \
  4923. x3 ^= rk0F; \
  4924. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  4925. p0 ^= x0; \
  4926. p1 ^= x1; \
  4927. p2 ^= x2; \
  4928. p3 ^= x3; \
  4929. x0 = pC ^ rk10; \
  4930. x1 = pD ^ rk11; \
  4931. x2 = pE ^ rk12; \
  4932. x3 = pF ^ rk13; \
  4933. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  4934. x0 ^= rk14; \
  4935. x1 ^= rk15; \
  4936. x2 ^= rk16; \
  4937. x3 ^= rk17; \
  4938. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  4939. x0 ^= rk18; \
  4940. x1 ^= rk19; \
  4941. x2 ^= rk1A; \
  4942. x3 ^= rk1B; \
  4943. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  4944. x0 ^= rk1C; \
  4945. x1 ^= rk1D; \
  4946. x2 ^= rk1E; \
  4947. x3 ^= rk1F; \
  4948. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  4949. p8 ^= x0; \
  4950. p9 ^= x1; \
  4951. pA ^= x2; \
  4952. pB ^= x3; \
  4953. \
  4954. for (r = 0; r < 3; r ++) { \
  4955. /* round 1, 5, 9 */ \
  4956. KEY_EXPAND_ELT(rk00, rk01, rk02, rk03); \
  4957. rk00 ^= rk1C; \
  4958. rk01 ^= rk1D; \
  4959. rk02 ^= rk1E; \
  4960. rk03 ^= rk1F; \
  4961. if (r == 0) { \
  4962. rk00 ^= sc_count0; \
  4963. rk01 ^= sc_count1; \
  4964. rk02 ^= sc_count2; \
  4965. rk03 ^= SPH_T32(~sc_count3); \
  4966. } \
  4967. x0 = p0 ^ rk00; \
  4968. x1 = p1 ^ rk01; \
  4969. x2 = p2 ^ rk02; \
  4970. x3 = p3 ^ rk03; \
  4971. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  4972. KEY_EXPAND_ELT(rk04, rk05, rk06, rk07); \
  4973. rk04 ^= rk00; \
  4974. rk05 ^= rk01; \
  4975. rk06 ^= rk02; \
  4976. rk07 ^= rk03; \
  4977. if (r == 1) { \
  4978. rk04 ^= sc_count3; \
  4979. rk05 ^= sc_count2; \
  4980. rk06 ^= sc_count1; \
  4981. rk07 ^= SPH_T32(~sc_count0); \
  4982. } \
  4983. x0 ^= rk04; \
  4984. x1 ^= rk05; \
  4985. x2 ^= rk06; \
  4986. x3 ^= rk07; \
  4987. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  4988. KEY_EXPAND_ELT(rk08, rk09, rk0A, rk0B); \
  4989. rk08 ^= rk04; \
  4990. rk09 ^= rk05; \
  4991. rk0A ^= rk06; \
  4992. rk0B ^= rk07; \
  4993. x0 ^= rk08; \
  4994. x1 ^= rk09; \
  4995. x2 ^= rk0A; \
  4996. x3 ^= rk0B; \
  4997. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  4998. KEY_EXPAND_ELT(rk0C, rk0D, rk0E, rk0F); \
  4999. rk0C ^= rk08; \
  5000. rk0D ^= rk09; \
  5001. rk0E ^= rk0A; \
  5002. rk0F ^= rk0B; \
  5003. x0 ^= rk0C; \
  5004. x1 ^= rk0D; \
  5005. x2 ^= rk0E; \
  5006. x3 ^= rk0F; \
  5007. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5008. pC ^= x0; \
  5009. pD ^= x1; \
  5010. pE ^= x2; \
  5011. pF ^= x3; \
  5012. KEY_EXPAND_ELT(rk10, rk11, rk12, rk13); \
  5013. rk10 ^= rk0C; \
  5014. rk11 ^= rk0D; \
  5015. rk12 ^= rk0E; \
  5016. rk13 ^= rk0F; \
  5017. x0 = p8 ^ rk10; \
  5018. x1 = p9 ^ rk11; \
  5019. x2 = pA ^ rk12; \
  5020. x3 = pB ^ rk13; \
  5021. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5022. KEY_EXPAND_ELT(rk14, rk15, rk16, rk17); \
  5023. rk14 ^= rk10; \
  5024. rk15 ^= rk11; \
  5025. rk16 ^= rk12; \
  5026. rk17 ^= rk13; \
  5027. x0 ^= rk14; \
  5028. x1 ^= rk15; \
  5029. x2 ^= rk16; \
  5030. x3 ^= rk17; \
  5031. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5032. KEY_EXPAND_ELT(rk18, rk19, rk1A, rk1B); \
  5033. rk18 ^= rk14; \
  5034. rk19 ^= rk15; \
  5035. rk1A ^= rk16; \
  5036. rk1B ^= rk17; \
  5037. x0 ^= rk18; \
  5038. x1 ^= rk19; \
  5039. x2 ^= rk1A; \
  5040. x3 ^= rk1B; \
  5041. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5042. KEY_EXPAND_ELT(rk1C, rk1D, rk1E, rk1F); \
  5043. rk1C ^= rk18; \
  5044. rk1D ^= rk19; \
  5045. rk1E ^= rk1A; \
  5046. rk1F ^= rk1B; \
  5047. if (r == 2) { \
  5048. rk1C ^= sc_count2; \
  5049. rk1D ^= sc_count3; \
  5050. rk1E ^= sc_count0; \
  5051. rk1F ^= SPH_T32(~sc_count1); \
  5052. } \
  5053. x0 ^= rk1C; \
  5054. x1 ^= rk1D; \
  5055. x2 ^= rk1E; \
  5056. x3 ^= rk1F; \
  5057. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5058. p4 ^= x0; \
  5059. p5 ^= x1; \
  5060. p6 ^= x2; \
  5061. p7 ^= x3; \
  5062. /* round 2, 6, 10 */ \
  5063. rk00 ^= rk19; \
  5064. x0 = pC ^ rk00; \
  5065. rk01 ^= rk1A; \
  5066. x1 = pD ^ rk01; \
  5067. rk02 ^= rk1B; \
  5068. x2 = pE ^ rk02; \
  5069. rk03 ^= rk1C; \
  5070. x3 = pF ^ rk03; \
  5071. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5072. rk04 ^= rk1D; \
  5073. x0 ^= rk04; \
  5074. rk05 ^= rk1E; \
  5075. x1 ^= rk05; \
  5076. rk06 ^= rk1F; \
  5077. x2 ^= rk06; \
  5078. rk07 ^= rk00; \
  5079. x3 ^= rk07; \
  5080. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5081. rk08 ^= rk01; \
  5082. x0 ^= rk08; \
  5083. rk09 ^= rk02; \
  5084. x1 ^= rk09; \
  5085. rk0A ^= rk03; \
  5086. x2 ^= rk0A; \
  5087. rk0B ^= rk04; \
  5088. x3 ^= rk0B; \
  5089. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5090. rk0C ^= rk05; \
  5091. x0 ^= rk0C; \
  5092. rk0D ^= rk06; \
  5093. x1 ^= rk0D; \
  5094. rk0E ^= rk07; \
  5095. x2 ^= rk0E; \
  5096. rk0F ^= rk08; \
  5097. x3 ^= rk0F; \
  5098. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5099. p8 ^= x0; \
  5100. p9 ^= x1; \
  5101. pA ^= x2; \
  5102. pB ^= x3; \
  5103. rk10 ^= rk09; \
  5104. x0 = p4 ^ rk10; \
  5105. rk11 ^= rk0A; \
  5106. x1 = p5 ^ rk11; \
  5107. rk12 ^= rk0B; \
  5108. x2 = p6 ^ rk12; \
  5109. rk13 ^= rk0C; \
  5110. x3 = p7 ^ rk13; \
  5111. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5112. rk14 ^= rk0D; \
  5113. x0 ^= rk14; \
  5114. rk15 ^= rk0E; \
  5115. x1 ^= rk15; \
  5116. rk16 ^= rk0F; \
  5117. x2 ^= rk16; \
  5118. rk17 ^= rk10; \
  5119. x3 ^= rk17; \
  5120. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5121. rk18 ^= rk11; \
  5122. x0 ^= rk18; \
  5123. rk19 ^= rk12; \
  5124. x1 ^= rk19; \
  5125. rk1A ^= rk13; \
  5126. x2 ^= rk1A; \
  5127. rk1B ^= rk14; \
  5128. x3 ^= rk1B; \
  5129. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5130. rk1C ^= rk15; \
  5131. x0 ^= rk1C; \
  5132. rk1D ^= rk16; \
  5133. x1 ^= rk1D; \
  5134. rk1E ^= rk17; \
  5135. x2 ^= rk1E; \
  5136. rk1F ^= rk18; \
  5137. x3 ^= rk1F; \
  5138. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5139. p0 ^= x0; \
  5140. p1 ^= x1; \
  5141. p2 ^= x2; \
  5142. p3 ^= x3; \
  5143. /* round 3, 7, 11 */ \
  5144. KEY_EXPAND_ELT(rk00, rk01, rk02, rk03); \
  5145. rk00 ^= rk1C; \
  5146. rk01 ^= rk1D; \
  5147. rk02 ^= rk1E; \
  5148. rk03 ^= rk1F; \
  5149. x0 = p8 ^ rk00; \
  5150. x1 = p9 ^ rk01; \
  5151. x2 = pA ^ rk02; \
  5152. x3 = pB ^ rk03; \
  5153. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5154. KEY_EXPAND_ELT(rk04, rk05, rk06, rk07); \
  5155. rk04 ^= rk00; \
  5156. rk05 ^= rk01; \
  5157. rk06 ^= rk02; \
  5158. rk07 ^= rk03; \
  5159. x0 ^= rk04; \
  5160. x1 ^= rk05; \
  5161. x2 ^= rk06; \
  5162. x3 ^= rk07; \
  5163. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5164. KEY_EXPAND_ELT(rk08, rk09, rk0A, rk0B); \
  5165. rk08 ^= rk04; \
  5166. rk09 ^= rk05; \
  5167. rk0A ^= rk06; \
  5168. rk0B ^= rk07; \
  5169. x0 ^= rk08; \
  5170. x1 ^= rk09; \
  5171. x2 ^= rk0A; \
  5172. x3 ^= rk0B; \
  5173. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5174. KEY_EXPAND_ELT(rk0C, rk0D, rk0E, rk0F); \
  5175. rk0C ^= rk08; \
  5176. rk0D ^= rk09; \
  5177. rk0E ^= rk0A; \
  5178. rk0F ^= rk0B; \
  5179. x0 ^= rk0C; \
  5180. x1 ^= rk0D; \
  5181. x2 ^= rk0E; \
  5182. x3 ^= rk0F; \
  5183. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5184. p4 ^= x0; \
  5185. p5 ^= x1; \
  5186. p6 ^= x2; \
  5187. p7 ^= x3; \
  5188. KEY_EXPAND_ELT(rk10, rk11, rk12, rk13); \
  5189. rk10 ^= rk0C; \
  5190. rk11 ^= rk0D; \
  5191. rk12 ^= rk0E; \
  5192. rk13 ^= rk0F; \
  5193. x0 = p0 ^ rk10; \
  5194. x1 = p1 ^ rk11; \
  5195. x2 = p2 ^ rk12; \
  5196. x3 = p3 ^ rk13; \
  5197. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5198. KEY_EXPAND_ELT(rk14, rk15, rk16, rk17); \
  5199. rk14 ^= rk10; \
  5200. rk15 ^= rk11; \
  5201. rk16 ^= rk12; \
  5202. rk17 ^= rk13; \
  5203. x0 ^= rk14; \
  5204. x1 ^= rk15; \
  5205. x2 ^= rk16; \
  5206. x3 ^= rk17; \
  5207. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5208. KEY_EXPAND_ELT(rk18, rk19, rk1A, rk1B); \
  5209. rk18 ^= rk14; \
  5210. rk19 ^= rk15; \
  5211. rk1A ^= rk16; \
  5212. rk1B ^= rk17; \
  5213. x0 ^= rk18; \
  5214. x1 ^= rk19; \
  5215. x2 ^= rk1A; \
  5216. x3 ^= rk1B; \
  5217. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5218. KEY_EXPAND_ELT(rk1C, rk1D, rk1E, rk1F); \
  5219. rk1C ^= rk18; \
  5220. rk1D ^= rk19; \
  5221. rk1E ^= rk1A; \
  5222. rk1F ^= rk1B; \
  5223. x0 ^= rk1C; \
  5224. x1 ^= rk1D; \
  5225. x2 ^= rk1E; \
  5226. x3 ^= rk1F; \
  5227. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5228. pC ^= x0; \
  5229. pD ^= x1; \
  5230. pE ^= x2; \
  5231. pF ^= x3; \
  5232. /* round 4, 8, 12 */ \
  5233. rk00 ^= rk19; \
  5234. x0 = p4 ^ rk00; \
  5235. rk01 ^= rk1A; \
  5236. x1 = p5 ^ rk01; \
  5237. rk02 ^= rk1B; \
  5238. x2 = p6 ^ rk02; \
  5239. rk03 ^= rk1C; \
  5240. x3 = p7 ^ rk03; \
  5241. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5242. rk04 ^= rk1D; \
  5243. x0 ^= rk04; \
  5244. rk05 ^= rk1E; \
  5245. x1 ^= rk05; \
  5246. rk06 ^= rk1F; \
  5247. x2 ^= rk06; \
  5248. rk07 ^= rk00; \
  5249. x3 ^= rk07; \
  5250. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5251. rk08 ^= rk01; \
  5252. x0 ^= rk08; \
  5253. rk09 ^= rk02; \
  5254. x1 ^= rk09; \
  5255. rk0A ^= rk03; \
  5256. x2 ^= rk0A; \
  5257. rk0B ^= rk04; \
  5258. x3 ^= rk0B; \
  5259. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5260. rk0C ^= rk05; \
  5261. x0 ^= rk0C; \
  5262. rk0D ^= rk06; \
  5263. x1 ^= rk0D; \
  5264. rk0E ^= rk07; \
  5265. x2 ^= rk0E; \
  5266. rk0F ^= rk08; \
  5267. x3 ^= rk0F; \
  5268. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5269. p0 ^= x0; \
  5270. p1 ^= x1; \
  5271. p2 ^= x2; \
  5272. p3 ^= x3; \
  5273. rk10 ^= rk09; \
  5274. x0 = pC ^ rk10; \
  5275. rk11 ^= rk0A; \
  5276. x1 = pD ^ rk11; \
  5277. rk12 ^= rk0B; \
  5278. x2 = pE ^ rk12; \
  5279. rk13 ^= rk0C; \
  5280. x3 = pF ^ rk13; \
  5281. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5282. rk14 ^= rk0D; \
  5283. x0 ^= rk14; \
  5284. rk15 ^= rk0E; \
  5285. x1 ^= rk15; \
  5286. rk16 ^= rk0F; \
  5287. x2 ^= rk16; \
  5288. rk17 ^= rk10; \
  5289. x3 ^= rk17; \
  5290. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5291. rk18 ^= rk11; \
  5292. x0 ^= rk18; \
  5293. rk19 ^= rk12; \
  5294. x1 ^= rk19; \
  5295. rk1A ^= rk13; \
  5296. x2 ^= rk1A; \
  5297. rk1B ^= rk14; \
  5298. x3 ^= rk1B; \
  5299. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5300. rk1C ^= rk15; \
  5301. x0 ^= rk1C; \
  5302. rk1D ^= rk16; \
  5303. x1 ^= rk1D; \
  5304. rk1E ^= rk17; \
  5305. x2 ^= rk1E; \
  5306. rk1F ^= rk18; \
  5307. x3 ^= rk1F; \
  5308. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5309. p8 ^= x0; \
  5310. p9 ^= x1; \
  5311. pA ^= x2; \
  5312. pB ^= x3; \
  5313. } \
  5314. /* round 13 */ \
  5315. KEY_EXPAND_ELT(rk00, rk01, rk02, rk03); \
  5316. rk00 ^= rk1C; \
  5317. rk01 ^= rk1D; \
  5318. rk02 ^= rk1E; \
  5319. rk03 ^= rk1F; \
  5320. x0 = p0 ^ rk00; \
  5321. x1 = p1 ^ rk01; \
  5322. x2 = p2 ^ rk02; \
  5323. x3 = p3 ^ rk03; \
  5324. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5325. KEY_EXPAND_ELT(rk04, rk05, rk06, rk07); \
  5326. rk04 ^= rk00; \
  5327. rk05 ^= rk01; \
  5328. rk06 ^= rk02; \
  5329. rk07 ^= rk03; \
  5330. x0 ^= rk04; \
  5331. x1 ^= rk05; \
  5332. x2 ^= rk06; \
  5333. x3 ^= rk07; \
  5334. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5335. KEY_EXPAND_ELT(rk08, rk09, rk0A, rk0B); \
  5336. rk08 ^= rk04; \
  5337. rk09 ^= rk05; \
  5338. rk0A ^= rk06; \
  5339. rk0B ^= rk07; \
  5340. x0 ^= rk08; \
  5341. x1 ^= rk09; \
  5342. x2 ^= rk0A; \
  5343. x3 ^= rk0B; \
  5344. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5345. KEY_EXPAND_ELT(rk0C, rk0D, rk0E, rk0F); \
  5346. rk0C ^= rk08; \
  5347. rk0D ^= rk09; \
  5348. rk0E ^= rk0A; \
  5349. rk0F ^= rk0B; \
  5350. x0 ^= rk0C; \
  5351. x1 ^= rk0D; \
  5352. x2 ^= rk0E; \
  5353. x3 ^= rk0F; \
  5354. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5355. pC ^= x0; \
  5356. pD ^= x1; \
  5357. pE ^= x2; \
  5358. pF ^= x3; \
  5359. KEY_EXPAND_ELT(rk10, rk11, rk12, rk13); \
  5360. rk10 ^= rk0C; \
  5361. rk11 ^= rk0D; \
  5362. rk12 ^= rk0E; \
  5363. rk13 ^= rk0F; \
  5364. x0 = p8 ^ rk10; \
  5365. x1 = p9 ^ rk11; \
  5366. x2 = pA ^ rk12; \
  5367. x3 = pB ^ rk13; \
  5368. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5369. KEY_EXPAND_ELT(rk14, rk15, rk16, rk17); \
  5370. rk14 ^= rk10; \
  5371. rk15 ^= rk11; \
  5372. rk16 ^= rk12; \
  5373. rk17 ^= rk13; \
  5374. x0 ^= rk14; \
  5375. x1 ^= rk15; \
  5376. x2 ^= rk16; \
  5377. x3 ^= rk17; \
  5378. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5379. KEY_EXPAND_ELT(rk18, rk19, rk1A, rk1B); \
  5380. rk18 ^= rk14 ^ sc_count1; \
  5381. rk19 ^= rk15 ^ sc_count0; \
  5382. rk1A ^= rk16 ^ sc_count3; \
  5383. rk1B ^= rk17 ^ SPH_T32(~sc_count2); \
  5384. x0 ^= rk18; \
  5385. x1 ^= rk19; \
  5386. x2 ^= rk1A; \
  5387. x3 ^= rk1B; \
  5388. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5389. KEY_EXPAND_ELT(rk1C, rk1D, rk1E, rk1F); \
  5390. rk1C ^= rk18; \
  5391. rk1D ^= rk19; \
  5392. rk1E ^= rk1A; \
  5393. rk1F ^= rk1B; \
  5394. x0 ^= rk1C; \
  5395. x1 ^= rk1D; \
  5396. x2 ^= rk1E; \
  5397. x3 ^= rk1F; \
  5398. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5399. p4 ^= x0; \
  5400. p5 ^= x1; \
  5401. p6 ^= x2; \
  5402. p7 ^= x3; \
  5403. h0 ^= p8; \
  5404. h1 ^= p9; \
  5405. h2 ^= pA; \
  5406. h3 ^= pB; \
  5407. h4 ^= pC; \
  5408. h5 ^= pD; \
  5409. h6 ^= pE; \
  5410. h7 ^= pF; \
  5411. h8 ^= p0; \
  5412. h9 ^= p1; \
  5413. hA ^= p2; \
  5414. hB ^= p3; \
  5415. hC ^= p4; \
  5416. hD ^= p5; \
  5417. hE ^= p6; \
  5418. hF ^= p7; \
  5419. } while (0)
  5420.  
  5421. #define c256(msg) do { \
  5422. sph_u32 p0, p1, p2, p3, p4, p5, p6, p7; \
  5423. sph_u32 x0, x1, x2, x3; \
  5424. \
  5425. p0 = h[0x0]; \
  5426. p1 = h[0x1]; \
  5427. p2 = h[0x2]; \
  5428. p3 = h[0x3]; \
  5429. p4 = h[0x4]; \
  5430. p5 = h[0x5]; \
  5431. p6 = h[0x6]; \
  5432. p7 = h[0x7]; \
  5433. /* round 0 */ \
  5434. x0 = p4 ^ rk0; \
  5435. x1 = p5 ^ rk1; \
  5436. x2 = p6 ^ rk2; \
  5437. x3 = p7 ^ rk3; \
  5438. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5439. x0 ^= rk4; \
  5440. x1 ^= rk5; \
  5441. x2 ^= rk6; \
  5442. x3 ^= rk7; \
  5443. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5444. x0 ^= rk8; \
  5445. x1 ^= rk9; \
  5446. x2 ^= rkA; \
  5447. x3 ^= rkB; \
  5448. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5449. p0 ^= x0; \
  5450. p1 ^= x1; \
  5451. p2 ^= x2; \
  5452. p3 ^= x3; \
  5453. /* round 1 */ \
  5454. x0 = p0 ^ rkC; \
  5455. x1 = p1 ^ rkD; \
  5456. x2 = p2 ^ rkE; \
  5457. x3 = p3 ^ rkF; \
  5458. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5459. KEY_EXPAND_ELT(rk0, rk1, rk2, rk3); \
  5460. rk0 ^= rkC ^ count0; \
  5461. rk1 ^= rkD ^ SPH_T32(~count1); \
  5462. rk2 ^= rkE; \
  5463. rk3 ^= rkF; \
  5464. x0 ^= rk0; \
  5465. x1 ^= rk1; \
  5466. x2 ^= rk2; \
  5467. x3 ^= rk3; \
  5468. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5469. KEY_EXPAND_ELT(rk4, rk5, rk6, rk7); \
  5470. rk4 ^= rk0; \
  5471. rk5 ^= rk1; \
  5472. rk6 ^= rk2; \
  5473. rk7 ^= rk3; \
  5474. x0 ^= rk4; \
  5475. x1 ^= rk5; \
  5476. x2 ^= rk6; \
  5477. x3 ^= rk7; \
  5478. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5479. p4 ^= x0; \
  5480. p5 ^= x1; \
  5481. p6 ^= x2; \
  5482. p7 ^= x3; \
  5483. /* round 2 */ \
  5484. KEY_EXPAND_ELT(rk8, rk9, rkA, rkB); \
  5485. rk8 ^= rk4; \
  5486. rk9 ^= rk5; \
  5487. rkA ^= rk6; \
  5488. rkB ^= rk7; \
  5489. x0 = p4 ^ rk8; \
  5490. x1 = p5 ^ rk9; \
  5491. x2 = p6 ^ rkA; \
  5492. x3 = p7 ^ rkB; \
  5493. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5494. KEY_EXPAND_ELT(rkC, rkD, rkE, rkF); \
  5495. rkC ^= rk8; \
  5496. rkD ^= rk9; \
  5497. rkE ^= rkA; \
  5498. rkF ^= rkB; \
  5499. x0 ^= rkC; \
  5500. x1 ^= rkD; \
  5501. x2 ^= rkE; \
  5502. x3 ^= rkF; \
  5503. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5504. rk0 ^= rkD; \
  5505. x0 ^= rk0; \
  5506. rk1 ^= rkE; \
  5507. x1 ^= rk1; \
  5508. rk2 ^= rkF; \
  5509. x2 ^= rk2; \
  5510. rk3 ^= rk0; \
  5511. x3 ^= rk3; \
  5512. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5513. p0 ^= x0; \
  5514. p1 ^= x1; \
  5515. p2 ^= x2; \
  5516. p3 ^= x3; \
  5517. /* round 3 */ \
  5518. rk4 ^= rk1; \
  5519. x0 = p0 ^ rk4; \
  5520. rk5 ^= rk2; \
  5521. x1 = p1 ^ rk5; \
  5522. rk6 ^= rk3; \
  5523. x2 = p2 ^ rk6; \
  5524. rk7 ^= rk4; \
  5525. x3 = p3 ^ rk7; \
  5526. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5527. rk8 ^= rk5; \
  5528. x0 ^= rk8; \
  5529. rk9 ^= rk6; \
  5530. x1 ^= rk9; \
  5531. rkA ^= rk7; \
  5532. x2 ^= rkA; \
  5533. rkB ^= rk8; \
  5534. x3 ^= rkB; \
  5535. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5536. rkC ^= rk9; \
  5537. x0 ^= rkC; \
  5538. rkD ^= rkA; \
  5539. x1 ^= rkD; \
  5540. rkE ^= rkB; \
  5541. x2 ^= rkE; \
  5542. rkF ^= rkC; \
  5543. x3 ^= rkF; \
  5544. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5545. p4 ^= x0; \
  5546. p5 ^= x1; \
  5547. p6 ^= x2; \
  5548. p7 ^= x3; \
  5549. /* round 4 */ \
  5550. KEY_EXPAND_ELT(rk0, rk1, rk2, rk3); \
  5551. rk0 ^= rkC; \
  5552. rk1 ^= rkD; \
  5553. rk2 ^= rkE; \
  5554. rk3 ^= rkF; \
  5555. x0 = p4 ^ rk0; \
  5556. x1 = p5 ^ rk1; \
  5557. x2 = p6 ^ rk2; \
  5558. x3 = p7 ^ rk3; \
  5559. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5560. KEY_EXPAND_ELT(rk4, rk5, rk6, rk7); \
  5561. rk4 ^= rk0; \
  5562. rk5 ^= rk1; \
  5563. rk6 ^= rk2; \
  5564. rk7 ^= rk3; \
  5565. x0 ^= rk4; \
  5566. x1 ^= rk5; \
  5567. x2 ^= rk6; \
  5568. x3 ^= rk7; \
  5569. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5570. KEY_EXPAND_ELT(rk8, rk9, rkA, rkB); \
  5571. rk8 ^= rk4; \
  5572. rk9 ^= rk5 ^ count1; \
  5573. rkA ^= rk6 ^ SPH_T32(~count0); \
  5574. rkB ^= rk7; \
  5575. x0 ^= rk8; \
  5576. x1 ^= rk9; \
  5577. x2 ^= rkA; \
  5578. x3 ^= rkB; \
  5579. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5580. p0 ^= x0; \
  5581. p1 ^= x1; \
  5582. p2 ^= x2; \
  5583. p3 ^= x3; \
  5584. /* round 5 */ \
  5585. KEY_EXPAND_ELT(rkC, rkD, rkE, rkF); \
  5586. rkC ^= rk8; \
  5587. rkD ^= rk9; \
  5588. rkE ^= rkA; \
  5589. rkF ^= rkB; \
  5590. x0 = p0 ^ rkC; \
  5591. x1 = p1 ^ rkD; \
  5592. x2 = p2 ^ rkE; \
  5593. x3 = p3 ^ rkF; \
  5594. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5595. rk0 ^= rkD; \
  5596. x0 ^= rk0; \
  5597. rk1 ^= rkE; \
  5598. x1 ^= rk1; \
  5599. rk2 ^= rkF; \
  5600. x2 ^= rk2; \
  5601. rk3 ^= rk0; \
  5602. x3 ^= rk3; \
  5603. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5604. rk4 ^= rk1; \
  5605. x0 ^= rk4; \
  5606. rk5 ^= rk2; \
  5607. x1 ^= rk5; \
  5608. rk6 ^= rk3; \
  5609. x2 ^= rk6; \
  5610. rk7 ^= rk4; \
  5611. x3 ^= rk7; \
  5612. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5613. p4 ^= x0; \
  5614. p5 ^= x1; \
  5615. p6 ^= x2; \
  5616. p7 ^= x3; \
  5617. /* round 6 */ \
  5618. rk8 ^= rk5; \
  5619. x0 = p4 ^ rk8; \
  5620. rk9 ^= rk6; \
  5621. x1 = p5 ^ rk9; \
  5622. rkA ^= rk7; \
  5623. x2 = p6 ^ rkA; \
  5624. rkB ^= rk8; \
  5625. x3 = p7 ^ rkB; \
  5626. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5627. rkC ^= rk9; \
  5628. x0 ^= rkC; \
  5629. rkD ^= rkA; \
  5630. x1 ^= rkD; \
  5631. rkE ^= rkB; \
  5632. x2 ^= rkE; \
  5633. rkF ^= rkC; \
  5634. x3 ^= rkF; \
  5635. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5636. KEY_EXPAND_ELT(rk0, rk1, rk2, rk3); \
  5637. rk0 ^= rkC; \
  5638. rk1 ^= rkD; \
  5639. rk2 ^= rkE; \
  5640. rk3 ^= rkF; \
  5641. x0 ^= rk0; \
  5642. x1 ^= rk1; \
  5643. x2 ^= rk2; \
  5644. x3 ^= rk3; \
  5645. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5646. p0 ^= x0; \
  5647. p1 ^= x1; \
  5648. p2 ^= x2; \
  5649. p3 ^= x3; \
  5650. /* round 7 */ \
  5651. KEY_EXPAND_ELT(rk4, rk5, rk6, rk7); \
  5652. rk4 ^= rk0; \
  5653. rk5 ^= rk1; \
  5654. rk6 ^= rk2 ^ count1; \
  5655. rk7 ^= rk3 ^ SPH_T32(~count0); \
  5656. x0 = p0 ^ rk4; \
  5657. x1 = p1 ^ rk5; \
  5658. x2 = p2 ^ rk6; \
  5659. x3 = p3 ^ rk7; \
  5660. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5661. KEY_EXPAND_ELT(rk8, rk9, rkA, rkB); \
  5662. rk8 ^= rk4; \
  5663. rk9 ^= rk5; \
  5664. rkA ^= rk6; \
  5665. rkB ^= rk7; \
  5666. x0 ^= rk8; \
  5667. x1 ^= rk9; \
  5668. x2 ^= rkA; \
  5669. x3 ^= rkB; \
  5670. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5671. KEY_EXPAND_ELT(rkC, rkD, rkE, rkF); \
  5672. rkC ^= rk8; \
  5673. rkD ^= rk9; \
  5674. rkE ^= rkA; \
  5675. rkF ^= rkB; \
  5676. x0 ^= rkC; \
  5677. x1 ^= rkD; \
  5678. x2 ^= rkE; \
  5679. x3 ^= rkF; \
  5680. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5681. p4 ^= x0; \
  5682. p5 ^= x1; \
  5683. p6 ^= x2; \
  5684. p7 ^= x3; \
  5685. /* round 8 */ \
  5686. rk0 ^= rkD; \
  5687. x0 = p4 ^ rk0; \
  5688. rk1 ^= rkE; \
  5689. x1 = p5 ^ rk1; \
  5690. rk2 ^= rkF; \
  5691. x2 = p6 ^ rk2; \
  5692. rk3 ^= rk0; \
  5693. x3 = p7 ^ rk3; \
  5694. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5695. rk4 ^= rk1; \
  5696. x0 ^= rk4; \
  5697. rk5 ^= rk2; \
  5698. x1 ^= rk5; \
  5699. rk6 ^= rk3; \
  5700. x2 ^= rk6; \
  5701. rk7 ^= rk4; \
  5702. x3 ^= rk7; \
  5703. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5704. rk8 ^= rk5; \
  5705. x0 ^= rk8; \
  5706. rk9 ^= rk6; \
  5707. x1 ^= rk9; \
  5708. rkA ^= rk7; \
  5709. x2 ^= rkA; \
  5710. rkB ^= rk8; \
  5711. x3 ^= rkB; \
  5712. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5713. p0 ^= x0; \
  5714. p1 ^= x1; \
  5715. p2 ^= x2; \
  5716. p3 ^= x3; \
  5717. /* round 9 */ \
  5718. rkC ^= rk9; \
  5719. x0 = p0 ^ rkC; \
  5720. rkD ^= rkA; \
  5721. x1 = p1 ^ rkD; \
  5722. rkE ^= rkB; \
  5723. x2 = p2 ^ rkE; \
  5724. rkF ^= rkC; \
  5725. x3 = p3 ^ rkF; \
  5726. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5727. KEY_EXPAND_ELT(rk0, rk1, rk2, rk3); \
  5728. rk0 ^= rkC; \
  5729. rk1 ^= rkD; \
  5730. rk2 ^= rkE; \
  5731. rk3 ^= rkF; \
  5732. x0 ^= rk0; \
  5733. x1 ^= rk1; \
  5734. x2 ^= rk2; \
  5735. x3 ^= rk3; \
  5736. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5737. KEY_EXPAND_ELT(rk4, rk5, rk6, rk7); \
  5738. rk4 ^= rk0; \
  5739. rk5 ^= rk1; \
  5740. rk6 ^= rk2; \
  5741. rk7 ^= rk3; \
  5742. x0 ^= rk4; \
  5743. x1 ^= rk5; \
  5744. x2 ^= rk6; \
  5745. x3 ^= rk7; \
  5746. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5747. p4 ^= x0; \
  5748. p5 ^= x1; \
  5749. p6 ^= x2; \
  5750. p7 ^= x3; \
  5751. /* round 10 */ \
  5752. KEY_EXPAND_ELT(rk8, rk9, rkA, rkB); \
  5753. rk8 ^= rk4; \
  5754. rk9 ^= rk5; \
  5755. rkA ^= rk6; \
  5756. rkB ^= rk7; \
  5757. x0 = p4 ^ rk8; \
  5758. x1 = p5 ^ rk9; \
  5759. x2 = p6 ^ rkA; \
  5760. x3 = p7 ^ rkB; \
  5761. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5762. KEY_EXPAND_ELT(rkC, rkD, rkE, rkF); \
  5763. rkC ^= rk8 ^ count0; \
  5764. rkD ^= rk9; \
  5765. rkE ^= rkA; \
  5766. rkF ^= rkB ^ SPH_T32(~count1); \
  5767. x0 ^= rkC; \
  5768. x1 ^= rkD; \
  5769. x2 ^= rkE; \
  5770. x3 ^= rkF; \
  5771. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5772. rk0 ^= rkD; \
  5773. x0 ^= rk0; \
  5774. rk1 ^= rkE; \
  5775. x1 ^= rk1; \
  5776. rk2 ^= rkF; \
  5777. x2 ^= rk2; \
  5778. rk3 ^= rk0; \
  5779. x3 ^= rk3; \
  5780. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5781. p0 ^= x0; \
  5782. p1 ^= x1; \
  5783. p2 ^= x2; \
  5784. p3 ^= x3; \
  5785. /* round 11 */ \
  5786. rk4 ^= rk1; \
  5787. x0 = p0 ^ rk4; \
  5788. rk5 ^= rk2; \
  5789. x1 = p1 ^ rk5; \
  5790. rk6 ^= rk3; \
  5791. x2 = p2 ^ rk6; \
  5792. rk7 ^= rk4; \
  5793. x3 = p3 ^ rk7; \
  5794. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5795. rk8 ^= rk5; \
  5796. x0 ^= rk8; \
  5797. rk9 ^= rk6; \
  5798. x1 ^= rk9; \
  5799. rkA ^= rk7; \
  5800. x2 ^= rkA; \
  5801. rkB ^= rk8; \
  5802. x3 ^= rkB; \
  5803. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5804. rkC ^= rk9; \
  5805. x0 ^= rkC; \
  5806. rkD ^= rkA; \
  5807. x1 ^= rkD; \
  5808. rkE ^= rkB; \
  5809. x2 ^= rkE; \
  5810. rkF ^= rkC; \
  5811. x3 ^= rkF; \
  5812. AES_ROUND_NOKEY(x0, x1, x2, x3); \
  5813. p4 ^= x0; \
  5814. p5 ^= x1; \
  5815. p6 ^= x2; \
  5816. p7 ^= x3; \
  5817. h[0x0] ^= p0; \
  5818. h[0x1] ^= p1; \
  5819. h[0x2] ^= p2; \
  5820. h[0x3] ^= p3; \
  5821. h[0x4] ^= p4; \
  5822. h[0x5] ^= p5; \
  5823. h[0x6] ^= p6; \
  5824. h[0x7] ^= p7; \
  5825. } while(0)
  5826.  
  5827. /* $Id: simd.c 227 2010-06-16 17:28:38Z tp $ */
  5828. /*
  5829. * SIMD implementation.
  5830. *
  5831. * ==========================(LICENSE BEGIN)============================
  5832. *
  5833. * Copyright (c) 2007-2010 Projet RNRT SAPHIR
  5834. *
  5835. * Permission is hereby granted, free of charge, to any person obtaining
  5836. * a copy of this software and associated documentation files (the
  5837. * "Software"), to deal in the Software without restriction, including
  5838. * without limitation the rights to use, copy, modify, merge, publish,
  5839. * distribute, sublicense, and/or sell copies of the Software, and to
  5840. * permit persons to whom the Software is furnished to do so, subject to
  5841. * the following conditions:
  5842. *
  5843. * The above copyright notice and this permission notice shall be
  5844. * included in all copies or substantial portions of the Software.
  5845. *
  5846. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  5847. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  5848. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  5849. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  5850. * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  5851. * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  5852. * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  5853. *
  5854. * ===========================(LICENSE END)=============================
  5855. *
  5856. * @author Thomas Pornin <thomas.pornin@cryptolog.com>
  5857. */
  5858.  
  5859. typedef sph_u32 u32;
  5860. typedef sph_s32 s32;
  5861. #define C32 SPH_C32
  5862. #define T32 SPH_T32
  5863. #define ROL32 SPH_ROTL32
  5864.  
  5865. #define XCAT(x, y) XCAT_(x, y)
  5866. #define XCAT_(x, y) x ## y
  5867.  
  5868. __constant const s32 SIMD_Q[] = {
  5869. 4, 28, -80, -120, -47, -126, 45, -123, -92, -127, -70, 23, -23, -24, 40, -125, 101, 122, 34, -24, -119, 110, -121, -112, 32, 24, 51, 73, -117, -64, -21, 42, -60, 16, 5, 85, 107, 52, -44, -96, 42, 127, -18, -108, -47, 26, 91, 117, 112, 46, 87, 79, 126, -120, 65, -24, 121, 29, 118, -7, -53, 85, -98, -117, 32, 115, -47, -116, 63, 16, -108, 49, -119, 57, -110, 4, -76, -76, -42, -86, 58, 115, 4, 4, -83, -51, -37, 116, 32, 15, 36, -42, 73, -99, 94, 87, 60, -20, 67, 12, -76, 55, 117, -68, -82, -80, 93, -20, 92, -21, -128, -91, -11, 84, -28, 76, 94, -124, 37, 93, 17, -78, -106, -29, 88, -15, -47, 102, -4, -28, 80, 120, 47, 126, -45, 123, 92, 127, 70, -23, 23, 24, -40, 125, -101, -122, -34, 24, 119, -110, 121, 112, -32, -24, -51, -73, 117, 64, 21, -42, 60, -16, -5, -85, -107, -52, 44, 96, -42, -127, 18, 108, 47, -26, -91, -117, -112, -46, -87, -79, -126, 120, -65, 24, -121, -29, -118, 7, 53, -85, 98, 117, -32, -115, 47, 116, -63, -16, 108, -49, 119, -57, 110, -4, 76, 76, 42, 86, -58, -115, -4, -4, 83, 51, 37, -116, -32, -15, -36, 42, -73, 99, -94, -87, -60, 20, -67, -12, 76, -55, -117, 68, 82, 80, -93, 20, -92, 21, 128, 91, 11, -84, 28, -76, -94, 124, -37, -93, -17, 78, 106, 29, -88, 15, 47, -102
  5870. };
  5871.  
  5872. /*
  5873. * The powers of 41 modulo 257. We use exponents from 0 to 255, inclusive.
  5874. */
  5875. __constant const s32 alpha_tab[] = {
  5876. 1, 41, 139, 45, 46, 87, 226, 14, 60, 147, 116, 130,
  5877. 190, 80, 196, 69, 2, 82, 21, 90, 92, 174, 195, 28,
  5878. 120, 37, 232, 3, 123, 160, 135, 138, 4, 164, 42, 180,
  5879. 184, 91, 133, 56, 240, 74, 207, 6, 246, 63, 13, 19,
  5880. 8, 71, 84, 103, 111, 182, 9, 112, 223, 148, 157, 12,
  5881. 235, 126, 26, 38, 16, 142, 168, 206, 222, 107, 18, 224,
  5882. 189, 39, 57, 24, 213, 252, 52, 76, 32, 27, 79, 155,
  5883. 187, 214, 36, 191, 121, 78, 114, 48, 169, 247, 104, 152,
  5884. 64, 54, 158, 53, 117, 171, 72, 125, 242, 156, 228, 96,
  5885. 81, 237, 208, 47, 128, 108, 59, 106, 234, 85, 144, 250,
  5886. 227, 55, 199, 192, 162, 217, 159, 94, 256, 216, 118, 212,
  5887. 211, 170, 31, 243, 197, 110, 141, 127, 67, 177, 61, 188,
  5888. 255, 175, 236, 167, 165, 83, 62, 229, 137, 220, 25, 254,
  5889. 134, 97, 122, 119, 253, 93, 215, 77, 73, 166, 124, 201,
  5890. 17, 183, 50, 251, 11, 194, 244, 238, 249, 186, 173, 154,
  5891. 146, 75, 248, 145, 34, 109, 100, 245, 22, 131, 231, 219,
  5892. 241, 115, 89, 51, 35, 150, 239, 33, 68, 218, 200, 233,
  5893. 44, 5, 205, 181, 225, 230, 178, 102, 70, 43, 221, 66,
  5894. 136, 179, 143, 209, 88, 10, 153, 105, 193, 203, 99, 204,
  5895. 140, 86, 185, 132, 15, 101, 29, 161, 176, 20, 49, 210,
  5896. 129, 149, 198, 151, 23, 172, 113, 7, 30, 202, 58, 65,
  5897. 95, 40, 98, 163
  5898. };
  5899.  
  5900. /*
  5901. * Ranges:
  5902. * REDS1: from -32768..98302 to -383..383
  5903. * REDS2: from -2^31..2^31-1 to -32768..98302
  5904. */
  5905. #define REDS1(x) (((x) & 0xFF) - ((x) >> 8))
  5906. #define REDS2(x) (((x) & 0xFFFF) + ((x) >> 16))
  5907.  
  5908. /*
  5909. * If, upon entry, the values of q[] are all in the -N..N range (where
  5910. * N >= 98302) then the new values of q[] are in the -2N..2N range.
  5911. *
  5912. * Since alpha_tab[v] <= 256, maximum allowed range is for N = 8388608.
  5913. */
  5914.  
  5915. #define FFT_LOOP_16_8(rb) do { \
  5916. s32 m = q[(rb)]; \
  5917. s32 n = q[(rb) + 16]; \
  5918. q[(rb)] = m + n; \
  5919. q[(rb) + 16] = m - n; \
  5920. s32 t; \
  5921. m = q[(rb) + 0 + 1]; \
  5922. n = q[(rb) + 0 + 1 + 16]; \
  5923. t = REDS2(n * alpha_tab[0 + 1 * 8]); \
  5924. q[(rb) + 0 + 1] = m + t; \
  5925. q[(rb) + 0 + 1 + 16] = m - t; \
  5926. m = q[(rb) + 0 + 2]; \
  5927. n = q[(rb) + 0 + 2 + 16]; \
  5928. t = REDS2(n * alpha_tab[0 + 2 * 8]); \
  5929. q[(rb) + 0 + 2] = m + t; \
  5930. q[(rb) + 0 + 2 + 16] = m - t; \
  5931. m = q[(rb) + 0 + 3]; \
  5932. n = q[(rb) + 0 + 3 + 16]; \
  5933. t = REDS2(n * alpha_tab[0 + 3 * 8]); \
  5934. q[(rb) + 0 + 3] = m + t; \
  5935. q[(rb) + 0 + 3 + 16] = m - t; \
  5936. \
  5937. m = q[(rb) + 4 + 0]; \
  5938. n = q[(rb) + 4 + 0 + 16]; \
  5939. t = REDS2(n * alpha_tab[32 + 0 * 8]); \
  5940. q[(rb) + 4 + 0] = m + t; \
  5941. q[(rb) + 4 + 0 + 16] = m - t; \
  5942. m = q[(rb) + 4 + 1]; \
  5943. n = q[(rb) + 4 + 1 + 16]; \
  5944. t = REDS2(n * alpha_tab[32 + 1 * 8]); \
  5945. q[(rb) + 4 + 1] = m + t; \
  5946. q[(rb) + 4 + 1 + 16] = m - t; \
  5947. m = q[(rb) + 4 + 2]; \
  5948. n = q[(rb) + 4 + 2 + 16]; \
  5949. t = REDS2(n * alpha_tab[32 + 2 * 8]); \
  5950. q[(rb) + 4 + 2] = m + t; \
  5951. q[(rb) + 4 + 2 + 16] = m - t; \
  5952. m = q[(rb) + 4 + 3]; \
  5953. n = q[(rb) + 4 + 3 + 16]; \
  5954. t = REDS2(n * alpha_tab[32 + 3 * 8]); \
  5955. q[(rb) + 4 + 3] = m + t; \
  5956. q[(rb) + 4 + 3 + 16] = m - t; \
  5957. \
  5958. m = q[(rb) + 8 + 0]; \
  5959. n = q[(rb) + 8 + 0 + 16]; \
  5960. t = REDS2(n * alpha_tab[64 + 0 * 8]); \
  5961. q[(rb) + 8 + 0] = m + t; \
  5962. q[(rb) + 8 + 0 + 16] = m - t; \
  5963. m = q[(rb) + 8 + 1]; \
  5964. n = q[(rb) + 8 + 1 + 16]; \
  5965. t = REDS2(n * alpha_tab[64 + 1 * 8]); \
  5966. q[(rb) + 8 + 1] = m + t; \
  5967. q[(rb) + 8 + 1 + 16] = m - t; \
  5968. m = q[(rb) + 8 + 2]; \
  5969. n = q[(rb) + 8 + 2 + 16]; \
  5970. t = REDS2(n * alpha_tab[64 + 2 * 8]); \
  5971. q[(rb) + 8 + 2] = m + t; \
  5972. q[(rb) + 8 + 2 + 16] = m - t; \
  5973. m = q[(rb) + 8 + 3]; \
  5974. n = q[(rb) + 8 + 3 + 16]; \
  5975. t = REDS2(n * alpha_tab[64 + 3 * 8]); \
  5976. q[(rb) + 8 + 3] = m + t; \
  5977. q[(rb) + 8 + 3 + 16] = m - t; \
  5978. \
  5979. m = q[(rb) + 12 + 0]; \
  5980. n = q[(rb) + 12 + 0 + 16]; \
  5981. t = REDS2(n * alpha_tab[96 + 0 * 8]); \
  5982. q[(rb) + 12 + 0] = m + t; \
  5983. q[(rb) + 12 + 0 + 16] = m - t; \
  5984. m = q[(rb) + 12 + 1]; \
  5985. n = q[(rb) + 12 + 1 + 16]; \
  5986. t = REDS2(n * alpha_tab[96 + 1 * 8]); \
  5987. q[(rb) + 12 + 1] = m + t; \
  5988. q[(rb) + 12 + 1 + 16] = m - t; \
  5989. m = q[(rb) + 12 + 2]; \
  5990. n = q[(rb) + 12 + 2 + 16]; \
  5991. t = REDS2(n * alpha_tab[96 + 2 * 8]); \
  5992. q[(rb) + 12 + 2] = m + t; \
  5993. q[(rb) + 12 + 2 + 16] = m - t; \
  5994. m = q[(rb) + 12 + 3]; \
  5995. n = q[(rb) + 12 + 3 + 16]; \
  5996. t = REDS2(n * alpha_tab[96 + 3 * 8]); \
  5997. q[(rb) + 12 + 3] = m + t; \
  5998. q[(rb) + 12 + 3 + 16] = m - t; \
  5999. } while (0)
  6000.  
  6001. #define FFT_LOOP_32_4(rb) do { \
  6002. s32 m = q[(rb)]; \
  6003. s32 n = q[(rb) + 32]; \
  6004. q[(rb)] = m + n; \
  6005. q[(rb) + 32] = m - n; \
  6006. s32 t; \
  6007. m = q[(rb) + 0 + 1]; \
  6008. n = q[(rb) + 0 + 1 + 32]; \
  6009. t = REDS2(n * alpha_tab[0 + 1 * 4]); \
  6010. q[(rb) + 0 + 1] = m + t; \
  6011. q[(rb) + 0 + 1 + 32] = m - t; \
  6012. m = q[(rb) + 0 + 2]; \
  6013. n = q[(rb) + 0 + 2 + 32]; \
  6014. t = REDS2(n * alpha_tab[0 + 2 * 4]); \
  6015. q[(rb) + 0 + 2] = m + t; \
  6016. q[(rb) + 0 + 2 + 32] = m - t; \
  6017. m = q[(rb) + 0 + 3]; \
  6018. n = q[(rb) + 0 + 3 + 32]; \
  6019. t = REDS2(n * alpha_tab[0 + 3 * 4]); \
  6020. q[(rb) + 0 + 3] = m + t; \
  6021. q[(rb) + 0 + 3 + 32] = m - t; \
  6022. \
  6023. m = q[(rb) + 4 + 0]; \
  6024. n = q[(rb) + 4 + 0 + 32]; \
  6025. t = REDS2(n * alpha_tab[16 + 0 * 4]); \
  6026. q[(rb) + 4 + 0] = m + t; \
  6027. q[(rb) + 4 + 0 + 32] = m - t; \
  6028. m = q[(rb) + 4 + 1]; \
  6029. n = q[(rb) + 4 + 1 + 32]; \
  6030. t = REDS2(n * alpha_tab[16 + 1 * 4]); \
  6031. q[(rb) + 4 + 1] = m + t; \
  6032. q[(rb) + 4 + 1 + 32] = m - t; \
  6033. m = q[(rb) + 4 + 2]; \
  6034. n = q[(rb) + 4 + 2 + 32]; \
  6035. t = REDS2(n * alpha_tab[16 + 2 * 4]); \
  6036. q[(rb) + 4 + 2] = m + t; \
  6037. q[(rb) + 4 + 2 + 32] = m - t; \
  6038. m = q[(rb) + 4 + 3]; \
  6039. n = q[(rb) + 4 + 3 + 32]; \
  6040. t = REDS2(n * alpha_tab[16 + 3 * 4]); \
  6041. q[(rb) + 4 + 3] = m + t; \
  6042. q[(rb) + 4 + 3 + 32] = m - t; \
  6043. \
  6044. m = q[(rb) + 8 + 0]; \
  6045. n = q[(rb) + 8 + 0 + 32]; \
  6046. t = REDS2(n * alpha_tab[32 + 0 * 4]); \
  6047. q[(rb) + 8 + 0] = m + t; \
  6048. q[(rb) + 8 + 0 + 32] = m - t; \
  6049. m = q[(rb) + 8 + 1]; \
  6050. n = q[(rb) + 8 + 1 + 32]; \
  6051. t = REDS2(n * alpha_tab[32 + 1 * 4]); \
  6052. q[(rb) + 8 + 1] = m + t; \
  6053. q[(rb) + 8 + 1 + 32] = m - t; \
  6054. m = q[(rb) + 8 + 2]; \
  6055. n = q[(rb) + 8 + 2 + 32]; \
  6056. t = REDS2(n * alpha_tab[32 + 2 * 4]); \
  6057. q[(rb) + 8 + 2] = m + t; \
  6058. q[(rb) + 8 + 2 + 32] = m - t; \
  6059. m = q[(rb) + 8 + 3]; \
  6060. n = q[(rb) + 8 + 3 + 32]; \
  6061. t = REDS2(n * alpha_tab[32 + 3 * 4]); \
  6062. q[(rb) + 8 + 3] = m + t; \
  6063. q[(rb) + 8 + 3 + 32] = m - t; \
  6064. \
  6065. m = q[(rb) + 12 + 0]; \
  6066. n = q[(rb) + 12 + 0 + 32]; \
  6067. t = REDS2(n * alpha_tab[48 + 0 * 4]); \
  6068. q[(rb) + 12 + 0] = m + t; \
  6069. q[(rb) + 12 + 0 + 32] = m - t; \
  6070. m = q[(rb) + 12 + 1]; \
  6071. n = q[(rb) + 12 + 1 + 32]; \
  6072. t = REDS2(n * alpha_tab[48 + 1 * 4]); \
  6073. q[(rb) + 12 + 1] = m + t; \
  6074. q[(rb) + 12 + 1 + 32] = m - t; \
  6075. m = q[(rb) + 12 + 2]; \
  6076. n = q[(rb) + 12 + 2 + 32]; \
  6077. t = REDS2(n * alpha_tab[48 + 2 * 4]); \
  6078. q[(rb) + 12 + 2] = m + t; \
  6079. q[(rb) + 12 + 2 + 32] = m - t; \
  6080. m = q[(rb) + 12 + 3]; \
  6081. n = q[(rb) + 12 + 3 + 32]; \
  6082. t = REDS2(n * alpha_tab[48 + 3 * 4]); \
  6083. q[(rb) + 12 + 3] = m + t; \
  6084. q[(rb) + 12 + 3 + 32] = m - t; \
  6085. \
  6086. m = q[(rb) + 16 + 0]; \
  6087. n = q[(rb) + 16 + 0 + 32]; \
  6088. t = REDS2(n * alpha_tab[64 + 0 * 4]); \
  6089. q[(rb) + 16 + 0] = m + t; \
  6090. q[(rb) + 16 + 0 + 32] = m - t; \
  6091. m = q[(rb) + 16 + 1]; \
  6092. n = q[(rb) + 16 + 1 + 32]; \
  6093. t = REDS2(n * alpha_tab[64 + 1 * 4]); \
  6094. q[(rb) + 16 + 1] = m + t; \
  6095. q[(rb) + 16 + 1 + 32] = m - t; \
  6096. m = q[(rb) + 16 + 2]; \
  6097. n = q[(rb) + 16 + 2 + 32]; \
  6098. t = REDS2(n * alpha_tab[64 + 2 * 4]); \
  6099. q[(rb) + 16 + 2] = m + t; \
  6100. q[(rb) + 16 + 2 + 32] = m - t; \
  6101. m = q[(rb) + 16 + 3]; \
  6102. n = q[(rb) + 16 + 3 + 32]; \
  6103. t = REDS2(n * alpha_tab[64 + 3 * 4]); \
  6104. q[(rb) + 16 + 3] = m + t; \
  6105. q[(rb) + 16 + 3 + 32] = m - t; \
  6106. \
  6107. m = q[(rb) + 20 + 0]; \
  6108. n = q[(rb) + 20 + 0 + 32]; \
  6109. t = REDS2(n * alpha_tab[80 + 0 * 4]); \
  6110. q[(rb) + 20 + 0] = m + t; \
  6111. q[(rb) + 20 + 0 + 32] = m - t; \
  6112. m = q[(rb) + 20 + 1]; \
  6113. n = q[(rb) + 20 + 1 + 32]; \
  6114. t = REDS2(n * alpha_tab[80 + 1 * 4]); \
  6115. q[(rb) + 20 + 1] = m + t; \
  6116. q[(rb) + 20 + 1 + 32] = m - t; \
  6117. m = q[(rb) + 20 + 2]; \
  6118. n = q[(rb) + 20 + 2 + 32]; \
  6119. t = REDS2(n * alpha_tab[80 + 2 * 4]); \
  6120. q[(rb) + 20 + 2] = m + t; \
  6121. q[(rb) + 20 + 2 + 32] = m - t; \
  6122. m = q[(rb) + 20 + 3]; \
  6123. n = q[(rb) + 20 + 3 + 32]; \
  6124. t = REDS2(n * alpha_tab[80 + 3 * 4]); \
  6125. q[(rb) + 20 + 3] = m + t; \
  6126. q[(rb) + 20 + 3 + 32] = m - t; \
  6127. \
  6128. m = q[(rb) + 24 + 0]; \
  6129. n = q[(rb) + 24 + 0 + 32]; \
  6130. t = REDS2(n * alpha_tab[96 + 0 * 4]); \
  6131. q[(rb) + 24 + 0] = m + t; \
  6132. q[(rb) + 24 + 0 + 32] = m - t; \
  6133. m = q[(rb) + 24 + 1]; \
  6134. n = q[(rb) + 24 + 1 + 32]; \
  6135. t = REDS2(n * alpha_tab[96 + 1 * 4]); \
  6136. q[(rb) + 24 + 1] = m + t; \
  6137. q[(rb) + 24 + 1 + 32] = m - t; \
  6138. m = q[(rb) + 24 + 2]; \
  6139. n = q[(rb) + 24 + 2 + 32]; \
  6140. t = REDS2(n * alpha_tab[96 + 2 * 4]); \
  6141. q[(rb) + 24 + 2] = m + t; \
  6142. q[(rb) + 24 + 2 + 32] = m - t; \
  6143. m = q[(rb) + 24 + 3]; \
  6144. n = q[(rb) + 24 + 3 + 32]; \
  6145. t = REDS2(n * alpha_tab[96 + 3 * 4]); \
  6146. q[(rb) + 24 + 3] = m + t; \
  6147. q[(rb) + 24 + 3 + 32] = m - t; \
  6148. \
  6149. m = q[(rb) + 28 + 0]; \
  6150. n = q[(rb) + 28 + 0 + 32]; \
  6151. t = REDS2(n * alpha_tab[112 + 0 * 4]); \
  6152. q[(rb) + 28 + 0] = m + t; \
  6153. q[(rb) + 28 + 0 + 32] = m - t; \
  6154. m = q[(rb) + 28 + 1]; \
  6155. n = q[(rb) + 28 + 1 + 32]; \
  6156. t = REDS2(n * alpha_tab[112 + 1 * 4]); \
  6157. q[(rb) + 28 + 1] = m + t; \
  6158. q[(rb) + 28 + 1 + 32] = m - t; \
  6159. m = q[(rb) + 28 + 2]; \
  6160. n = q[(rb) + 28 + 2 + 32]; \
  6161. t = REDS2(n * alpha_tab[112 + 2 * 4]); \
  6162. q[(rb) + 28 + 2] = m + t; \
  6163. q[(rb) + 28 + 2 + 32] = m - t; \
  6164. m = q[(rb) + 28 + 3]; \
  6165. n = q[(rb) + 28 + 3 + 32]; \
  6166. t = REDS2(n * alpha_tab[112 + 3 * 4]); \
  6167. q[(rb) + 28 + 3] = m + t; \
  6168. q[(rb) + 28 + 3 + 32] = m - t; \
  6169. } while (0)
  6170.  
  6171. #define FFT_LOOP_64_2(rb) do { \
  6172. s32 m = q[(rb)]; \
  6173. s32 n = q[(rb) + 64]; \
  6174. q[(rb)] = m + n; \
  6175. q[(rb) + 64] = m - n; \
  6176. s32 t; \
  6177. m = q[(rb) + 0 + 1]; \
  6178. n = q[(rb) + 0 + 1 + 64]; \
  6179. t = REDS2(n * alpha_tab[0 + 1 * 2]); \
  6180. q[(rb) + 0 + 1] = m + t; \
  6181. q[(rb) + 0 + 1 + 64] = m - t; \
  6182. m = q[(rb) + 0 + 2]; \
  6183. n = q[(rb) + 0 + 2 + 64]; \
  6184. t = REDS2(n * alpha_tab[0 + 2 * 2]); \
  6185. q[(rb) + 0 + 2] = m + t; \
  6186. q[(rb) + 0 + 2 + 64] = m - t; \
  6187. m = q[(rb) + 0 + 3]; \
  6188. n = q[(rb) + 0 + 3 + 64]; \
  6189. t = REDS2(n * alpha_tab[0 + 3 * 2]); \
  6190. q[(rb) + 0 + 3] = m + t; \
  6191. q[(rb) + 0 + 3 + 64] = m - t; \
  6192. \
  6193. m = q[(rb) + 4 + 0]; \
  6194. n = q[(rb) + 4 + 0 + 64]; \
  6195. t = REDS2(n * alpha_tab[8 + 0 * 2]); \
  6196. q[(rb) + 4 + 0] = m + t; \
  6197. q[(rb) + 4 + 0 + 64] = m - t; \
  6198. m = q[(rb) + 4 + 1]; \
  6199. n = q[(rb) + 4 + 1 + 64]; \
  6200. t = REDS2(n * alpha_tab[8 + 1 * 2]); \
  6201. q[(rb) + 4 + 1] = m + t; \
  6202. q[(rb) + 4 + 1 + 64] = m - t; \
  6203. m = q[(rb) + 4 + 2]; \
  6204. n = q[(rb) + 4 + 2 + 64]; \
  6205. t = REDS2(n * alpha_tab[8 + 2 * 2]); \
  6206. q[(rb) + 4 + 2] = m + t; \
  6207. q[(rb) + 4 + 2 + 64] = m - t; \
  6208. m = q[(rb) + 4 + 3]; \
  6209. n = q[(rb) + 4 + 3 + 64]; \
  6210. t = REDS2(n * alpha_tab[8 + 3 * 2]); \
  6211. q[(rb) + 4 + 3] = m + t; \
  6212. q[(rb) + 4 + 3 + 64] = m - t; \
  6213. \
  6214. m = q[(rb) + 8 + 0]; \
  6215. n = q[(rb) + 8 + 0 + 64]; \
  6216. t = REDS2(n * alpha_tab[16 + 0 * 2]); \
  6217. q[(rb) + 8 + 0] = m + t; \
  6218. q[(rb) + 8 + 0 + 64] = m - t; \
  6219. m = q[(rb) + 8 + 1]; \
  6220. n = q[(rb) + 8 + 1 + 64]; \
  6221. t = REDS2(n * alpha_tab[16 + 1 * 2]); \
  6222. q[(rb) + 8 + 1] = m + t; \
  6223. q[(rb) + 8 + 1 + 64] = m - t; \
  6224. m = q[(rb) + 8 + 2]; \
  6225. n = q[(rb) + 8 + 2 + 64]; \
  6226. t = REDS2(n * alpha_tab[16 + 2 * 2]); \
  6227. q[(rb) + 8 + 2] = m + t; \
  6228. q[(rb) + 8 + 2 + 64] = m - t; \
  6229. m = q[(rb) + 8 + 3]; \
  6230. n = q[(rb) + 8 + 3 + 64]; \
  6231. t = REDS2(n * alpha_tab[16 + 3 * 2]); \
  6232. q[(rb) + 8 + 3] = m + t; \
  6233. q[(rb) + 8 + 3 + 64] = m - t; \
  6234. \
  6235. m = q[(rb) + 12 + 0]; \
  6236. n = q[(rb) + 12 + 0 + 64]; \
  6237. t = REDS2(n * alpha_tab[24 + 0 * 2]); \
  6238. q[(rb) + 12 + 0] = m + t; \
  6239. q[(rb) + 12 + 0 + 64] = m - t; \
  6240. m = q[(rb) + 12 + 1]; \
  6241. n = q[(rb) + 12 + 1 + 64]; \
  6242. t = REDS2(n * alpha_tab[24 + 1 * 2]); \
  6243. q[(rb) + 12 + 1] = m + t; \
  6244. q[(rb) + 12 + 1 + 64] = m - t; \
  6245. m = q[(rb) + 12 + 2]; \
  6246. n = q[(rb) + 12 + 2 + 64]; \
  6247. t = REDS2(n * alpha_tab[24 + 2 * 2]); \
  6248. q[(rb) + 12 + 2] = m + t; \
  6249. q[(rb) + 12 + 2 + 64] = m - t; \
  6250. m = q[(rb) + 12 + 3]; \
  6251. n = q[(rb) + 12 + 3 + 64]; \
  6252. t = REDS2(n * alpha_tab[24 + 3 * 2]); \
  6253. q[(rb) + 12 + 3] = m + t; \
  6254. q[(rb) + 12 + 3 + 64] = m - t; \
  6255. \
  6256. m = q[(rb) + 16 + 0]; \
  6257. n = q[(rb) + 16 + 0 + 64]; \
  6258. t = REDS2(n * alpha_tab[32 + 0 * 2]); \
  6259. q[(rb) + 16 + 0] = m + t; \
  6260. q[(rb) + 16 + 0 + 64] = m - t; \
  6261. m = q[(rb) + 16 + 1]; \
  6262. n = q[(rb) + 16 + 1 + 64]; \
  6263. t = REDS2(n * alpha_tab[32 + 1 * 2]); \
  6264. q[(rb) + 16 + 1] = m + t; \
  6265. q[(rb) + 16 + 1 + 64] = m - t; \
  6266. m = q[(rb) + 16 + 2]; \
  6267. n = q[(rb) + 16 + 2 + 64]; \
  6268. t = REDS2(n * alpha_tab[32 + 2 * 2]); \
  6269. q[(rb) + 16 + 2] = m + t; \
  6270. q[(rb) + 16 + 2 + 64] = m - t; \
  6271. m = q[(rb) + 16 + 3]; \
  6272. n = q[(rb) + 16 + 3 + 64]; \
  6273. t = REDS2(n * alpha_tab[32 + 3 * 2]); \
  6274. q[(rb) + 16 + 3] = m + t; \
  6275. q[(rb) + 16 + 3 + 64] = m - t; \
  6276. \
  6277. m = q[(rb) + 20 + 0]; \
  6278. n = q[(rb) + 20 + 0 + 64]; \
  6279. t = REDS2(n * alpha_tab[40 + 0 * 2]); \
  6280. q[(rb) + 20 + 0] = m + t; \
  6281. q[(rb) + 20 + 0 + 64] = m - t; \
  6282. m = q[(rb) + 20 + 1]; \
  6283. n = q[(rb) + 20 + 1 + 64]; \
  6284. t = REDS2(n * alpha_tab[40 + 1 * 2]); \
  6285. q[(rb) + 20 + 1] = m + t; \
  6286. q[(rb) + 20 + 1 + 64] = m - t; \
  6287. m = q[(rb) + 20 + 2]; \
  6288. n = q[(rb) + 20 + 2 + 64]; \
  6289. t = REDS2(n * alpha_tab[40 + 2 * 2]); \
  6290. q[(rb) + 20 + 2] = m + t; \
  6291. q[(rb) + 20 + 2 + 64] = m - t; \
  6292. m = q[(rb) + 20 + 3]; \
  6293. n = q[(rb) + 20 + 3 + 64]; \
  6294. t = REDS2(n * alpha_tab[40 + 3 * 2]); \
  6295. q[(rb) + 20 + 3] = m + t; \
  6296. q[(rb) + 20 + 3 + 64] = m - t; \
  6297. \
  6298. m = q[(rb) + 24 + 0]; \
  6299. n = q[(rb) + 24 + 0 + 64]; \
  6300. t = REDS2(n * alpha_tab[48 + 0 * 2]); \
  6301. q[(rb) + 24 + 0] = m + t; \
  6302. q[(rb) + 24 + 0 + 64] = m - t; \
  6303. m = q[(rb) + 24 + 1]; \
  6304. n = q[(rb) + 24 + 1 + 64]; \
  6305. t = REDS2(n * alpha_tab[48 + 1 * 2]); \
  6306. q[(rb) + 24 + 1] = m + t; \
  6307. q[(rb) + 24 + 1 + 64] = m - t; \
  6308. m = q[(rb) + 24 + 2]; \
  6309. n = q[(rb) + 24 + 2 + 64]; \
  6310. t = REDS2(n * alpha_tab[48 + 2 * 2]); \
  6311. q[(rb) + 24 + 2] = m + t; \
  6312. q[(rb) + 24 + 2 + 64] = m - t; \
  6313. m = q[(rb) + 24 + 3]; \
  6314. n = q[(rb) + 24 + 3 + 64]; \
  6315. t = REDS2(n * alpha_tab[48 + 3 * 2]); \
  6316. q[(rb) + 24 + 3] = m + t; \
  6317. q[(rb) + 24 + 3 + 64] = m - t; \
  6318. \
  6319. m = q[(rb) + 28 + 0]; \
  6320. n = q[(rb) + 28 + 0 + 64]; \
  6321. t = REDS2(n * alpha_tab[56 + 0 * 2]); \
  6322. q[(rb) + 28 + 0] = m + t; \
  6323. q[(rb) + 28 + 0 + 64] = m - t; \
  6324. m = q[(rb) + 28 + 1]; \
  6325. n = q[(rb) + 28 + 1 + 64]; \
  6326. t = REDS2(n * alpha_tab[56 + 1 * 2]); \
  6327. q[(rb) + 28 + 1] = m + t; \
  6328. q[(rb) + 28 + 1 + 64] = m - t; \
  6329. m = q[(rb) + 28 + 2]; \
  6330. n = q[(rb) + 28 + 2 + 64]; \
  6331. t = REDS2(n * alpha_tab[56 + 2 * 2]); \
  6332. q[(rb) + 28 + 2] = m + t; \
  6333. q[(rb) + 28 + 2 + 64] = m - t; \
  6334. m = q[(rb) + 28 + 3]; \
  6335. n = q[(rb) + 28 + 3 + 64]; \
  6336. t = REDS2(n * alpha_tab[56 + 3 * 2]); \
  6337. q[(rb) + 28 + 3] = m + t; \
  6338. q[(rb) + 28 + 3 + 64] = m - t; \
  6339. \
  6340. m = q[(rb) + 32 + 0]; \
  6341. n = q[(rb) + 32 + 0 + 64]; \
  6342. t = REDS2(n * alpha_tab[64 + 0 * 2]); \
  6343. q[(rb) + 32 + 0] = m + t; \
  6344. q[(rb) + 32 + 0 + 64] = m - t; \
  6345. m = q[(rb) + 32 + 1]; \
  6346. n = q[(rb) + 32 + 1 + 64]; \
  6347. t = REDS2(n * alpha_tab[64 + 1 * 2]); \
  6348. q[(rb) + 32 + 1] = m + t; \
  6349. q[(rb) + 32 + 1 + 64] = m - t; \
  6350. m = q[(rb) + 32 + 2]; \
  6351. n = q[(rb) + 32 + 2 + 64]; \
  6352. t = REDS2(n * alpha_tab[64 + 2 * 2]); \
  6353. q[(rb) + 32 + 2] = m + t; \
  6354. q[(rb) + 32 + 2 + 64] = m - t; \
  6355. m = q[(rb) + 32 + 3]; \
  6356. n = q[(rb) + 32 + 3 + 64]; \
  6357. t = REDS2(n * alpha_tab[64 + 3 * 2]); \
  6358. q[(rb) + 32 + 3] = m + t; \
  6359. q[(rb) + 32 + 3 + 64] = m - t; \
  6360. \
  6361. m = q[(rb) + 36 + 0]; \
  6362. n = q[(rb) + 36 + 0 + 64]; \
  6363. t = REDS2(n * alpha_tab[72 + 0 * 2]); \
  6364. q[(rb) + 36 + 0] = m + t; \
  6365. q[(rb) + 36 + 0 + 64] = m - t; \
  6366. m = q[(rb) + 36 + 1]; \
  6367. n = q[(rb) + 36 + 1 + 64]; \
  6368. t = REDS2(n * alpha_tab[72 + 1 * 2]); \
  6369. q[(rb) + 36 + 1] = m + t; \
  6370. q[(rb) + 36 + 1 + 64] = m - t; \
  6371. m = q[(rb) + 36 + 2]; \
  6372. n = q[(rb) + 36 + 2 + 64]; \
  6373. t = REDS2(n * alpha_tab[72 + 2 * 2]); \
  6374. q[(rb) + 36 + 2] = m + t; \
  6375. q[(rb) + 36 + 2 + 64] = m - t; \
  6376. m = q[(rb) + 36 + 3]; \
  6377. n = q[(rb) + 36 + 3 + 64]; \
  6378. t = REDS2(n * alpha_tab[72 + 3 * 2]); \
  6379. q[(rb) + 36 + 3] = m + t; \
  6380. q[(rb) + 36 + 3 + 64] = m - t; \
  6381. \
  6382. m = q[(rb) + 40 + 0]; \
  6383. n = q[(rb) + 40 + 0 + 64]; \
  6384. t = REDS2(n * alpha_tab[80 + 0 * 2]); \
  6385. q[(rb) + 40 + 0] = m + t; \
  6386. q[(rb) + 40 + 0 + 64] = m - t; \
  6387. m = q[(rb) + 40 + 1]; \
  6388. n = q[(rb) + 40 + 1 + 64]; \
  6389. t = REDS2(n * alpha_tab[80 + 1 * 2]); \
  6390. q[(rb) + 40 + 1] = m + t; \
  6391. q[(rb) + 40 + 1 + 64] = m - t; \
  6392. m = q[(rb) + 40 + 2]; \
  6393. n = q[(rb) + 40 + 2 + 64]; \
  6394. t = REDS2(n * alpha_tab[80 + 2 * 2]); \
  6395. q[(rb) + 40 + 2] = m + t; \
  6396. q[(rb) + 40 + 2 + 64] = m - t; \
  6397. m = q[(rb) + 40 + 3]; \
  6398. n = q[(rb) + 40 + 3 + 64]; \
  6399. t = REDS2(n * alpha_tab[80 + 3 * 2]); \
  6400. q[(rb) + 40 + 3] = m + t; \
  6401. q[(rb) + 40 + 3 + 64] = m - t; \
  6402. \
  6403. m = q[(rb) + 44 + 0]; \
  6404. n = q[(rb) + 44 + 0 + 64]; \
  6405. t = REDS2(n * alpha_tab[88 + 0 * 2]); \
  6406. q[(rb) + 44 + 0] = m + t; \
  6407. q[(rb) + 44 + 0 + 64] = m - t; \
  6408. m = q[(rb) + 44 + 1]; \
  6409. n = q[(rb) + 44 + 1 + 64]; \
  6410. t = REDS2(n * alpha_tab[88 + 1 * 2]); \
  6411. q[(rb) + 44 + 1] = m + t; \
  6412. q[(rb) + 44 + 1 + 64] = m - t; \
  6413. m = q[(rb) + 44 + 2]; \
  6414. n = q[(rb) + 44 + 2 + 64]; \
  6415. t = REDS2(n * alpha_tab[88 + 2 * 2]); \
  6416. q[(rb) + 44 + 2] = m + t; \
  6417. q[(rb) + 44 + 2 + 64] = m - t; \
  6418. m = q[(rb) + 44 + 3]; \
  6419. n = q[(rb) + 44 + 3 + 64]; \
  6420. t = REDS2(n * alpha_tab[88 + 3 * 2]); \
  6421. q[(rb) + 44 + 3] = m + t; \
  6422. q[(rb) + 44 + 3 + 64] = m - t; \
  6423. \
  6424. m = q[(rb) + 48 + 0]; \
  6425. n = q[(rb) + 48 + 0 + 64]; \
  6426. t = REDS2(n * alpha_tab[96 + 0 * 2]); \
  6427. q[(rb) + 48 + 0] = m + t; \
  6428. q[(rb) + 48 + 0 + 64] = m - t; \
  6429. m = q[(rb) + 48 + 1]; \
  6430. n = q[(rb) + 48 + 1 + 64]; \
  6431. t = REDS2(n * alpha_tab[96 + 1 * 2]); \
  6432. q[(rb) + 48 + 1] = m + t; \
  6433. q[(rb) + 48 + 1 + 64] = m - t; \
  6434. m = q[(rb) + 48 + 2]; \
  6435. n = q[(rb) + 48 + 2 + 64]; \
  6436. t = REDS2(n * alpha_tab[96 + 2 * 2]); \
  6437. q[(rb) + 48 + 2] = m + t; \
  6438. q[(rb) + 48 + 2 + 64] = m - t; \
  6439. m = q[(rb) + 48 + 3]; \
  6440. n = q[(rb) + 48 + 3 + 64]; \
  6441. t = REDS2(n * alpha_tab[96 + 3 * 2]); \
  6442. q[(rb) + 48 + 3] = m + t; \
  6443. q[(rb) + 48 + 3 + 64] = m - t; \
  6444. \
  6445. m = q[(rb) + 52 + 0]; \
  6446. n = q[(rb) + 52 + 0 + 64]; \
  6447. t = REDS2(n * alpha_tab[104 + 0 * 2]); \
  6448. q[(rb) + 52 + 0] = m + t; \
  6449. q[(rb) + 52 + 0 + 64] = m - t; \
  6450. m = q[(rb) + 52 + 1]; \
  6451. n = q[(rb) + 52 + 1 + 64]; \
  6452. t = REDS2(n * alpha_tab[104 + 1 * 2]); \
  6453. q[(rb) + 52 + 1] = m + t; \
  6454. q[(rb) + 52 + 1 + 64] = m - t; \
  6455. m = q[(rb) + 52 + 2]; \
  6456. n = q[(rb) + 52 + 2 + 64]; \
  6457. t = REDS2(n * alpha_tab[104 + 2 * 2]); \
  6458. q[(rb) + 52 + 2] = m + t; \
  6459. q[(rb) + 52 + 2 + 64] = m - t; \
  6460. m = q[(rb) + 52 + 3]; \
  6461. n = q[(rb) + 52 + 3 + 64]; \
  6462. t = REDS2(n * alpha_tab[104 + 3 * 2]); \
  6463. q[(rb) + 52 + 3] = m + t; \
  6464. q[(rb) + 52 + 3 + 64] = m - t; \
  6465. \
  6466. m = q[(rb) + 56 + 0]; \
  6467. n = q[(rb) + 56 + 0 + 64]; \
  6468. t = REDS2(n * alpha_tab[112 + 0 * 2]); \
  6469. q[(rb) + 56 + 0] = m + t; \
  6470. q[(rb) + 56 + 0 + 64] = m - t; \
  6471. m = q[(rb) + 56 + 1]; \
  6472. n = q[(rb) + 56 + 1 + 64]; \
  6473. t = REDS2(n * alpha_tab[112 + 1 * 2]); \
  6474. q[(rb) + 56 + 1] = m + t; \
  6475. q[(rb) + 56 + 1 + 64] = m - t; \
  6476. m = q[(rb) + 56 + 2]; \
  6477. n = q[(rb) + 56 + 2 + 64]; \
  6478. t = REDS2(n * alpha_tab[112 + 2 * 2]); \
  6479. q[(rb) + 56 + 2] = m + t; \
  6480. q[(rb) + 56 + 2 + 64] = m - t; \
  6481. m = q[(rb) + 56 + 3]; \
  6482. n = q[(rb) + 56 + 3 + 64]; \
  6483. t = REDS2(n * alpha_tab[112 + 3 * 2]); \
  6484. q[(rb) + 56 + 3] = m + t; \
  6485. q[(rb) + 56 + 3 + 64] = m - t; \
  6486. \
  6487. m = q[(rb) + 60 + 0]; \
  6488. n = q[(rb) + 60 + 0 + 64]; \
  6489. t = REDS2(n * alpha_tab[120 + 0 * 2]); \
  6490. q[(rb) + 60 + 0] = m + t; \
  6491. q[(rb) + 60 + 0 + 64] = m - t; \
  6492. m = q[(rb) + 60 + 1]; \
  6493. n = q[(rb) + 60 + 1 + 64]; \
  6494. t = REDS2(n * alpha_tab[120 + 1 * 2]); \
  6495. q[(rb) + 60 + 1] = m + t; \
  6496. q[(rb) + 60 + 1 + 64] = m - t; \
  6497. m = q[(rb) + 60 + 2]; \
  6498. n = q[(rb) + 60 + 2 + 64]; \
  6499. t = REDS2(n * alpha_tab[120 + 2 * 2]); \
  6500. q[(rb) + 60 + 2] = m + t; \
  6501. q[(rb) + 60 + 2 + 64] = m - t; \
  6502. m = q[(rb) + 60 + 3]; \
  6503. n = q[(rb) + 60 + 3 + 64]; \
  6504. t = REDS2(n * alpha_tab[120 + 3 * 2]); \
  6505. q[(rb) + 60 + 3] = m + t; \
  6506. q[(rb) + 60 + 3 + 64] = m - t; \
  6507. } while (0)
  6508.  
  6509. #define FFT_LOOP_128_1(rb) do { \
  6510. s32 m = q[(rb)]; \
  6511. s32 n = q[(rb) + 128]; \
  6512. q[(rb)] = m + n; \
  6513. q[(rb) + 128] = m - n; \
  6514. s32 t; \
  6515. m = q[(rb) + 0 + 1]; \
  6516. n = q[(rb) + 0 + 1 + 128]; \
  6517. t = REDS2(n * alpha_tab[0 + 1 * 1]); \
  6518. q[(rb) + 0 + 1] = m + t; \
  6519. q[(rb) + 0 + 1 + 128] = m - t; \
  6520. m = q[(rb) + 0 + 2]; \
  6521. n = q[(rb) + 0 + 2 + 128]; \
  6522. t = REDS2(n * alpha_tab[0 + 2 * 1]); \
  6523. q[(rb) + 0 + 2] = m + t; \
  6524. q[(rb) + 0 + 2 + 128] = m - t; \
  6525. m = q[(rb) + 0 + 3]; \
  6526. n = q[(rb) + 0 + 3 + 128]; \
  6527. t = REDS2(n * alpha_tab[0 + 3 * 1]); \
  6528. q[(rb) + 0 + 3] = m + t; \
  6529. q[(rb) + 0 + 3 + 128] = m - t; \
  6530. m = q[(rb) + 4 + 0]; \
  6531. n = q[(rb) + 4 + 0 + 128]; \
  6532. t = REDS2(n * alpha_tab[4 + 0 * 1]); \
  6533. q[(rb) + 4 + 0] = m + t; \
  6534. q[(rb) + 4 + 0 + 128] = m - t; \
  6535. m = q[(rb) + 4 + 1]; \
  6536. n = q[(rb) + 4 + 1 + 128]; \
  6537. t = REDS2(n * alpha_tab[4 + 1 * 1]); \
  6538. q[(rb) + 4 + 1] = m + t; \
  6539. q[(rb) + 4 + 1 + 128] = m - t; \
  6540. m = q[(rb) + 4 + 2]; \
  6541. n = q[(rb) + 4 + 2 + 128]; \
  6542. t = REDS2(n * alpha_tab[4 + 2 * 1]); \
  6543. q[(rb) + 4 + 2] = m + t; \
  6544. q[(rb) + 4 + 2 + 128] = m - t; \
  6545. m = q[(rb) + 4 + 3]; \
  6546. n = q[(rb) + 4 + 3 + 128]; \
  6547. t = REDS2(n * alpha_tab[4 + 3 * 1]); \
  6548. q[(rb) + 4 + 3] = m + t; \
  6549. q[(rb) + 4 + 3 + 128] = m - t; \
  6550. m = q[(rb) + 8 + 0]; \
  6551. n = q[(rb) + 8 + 0 + 128]; \
  6552. t = REDS2(n * alpha_tab[8 + 0 * 1]); \
  6553. q[(rb) + 8 + 0] = m + t; \
  6554. q[(rb) + 8 + 0 + 128] = m - t; \
  6555. m = q[(rb) + 8 + 1]; \
  6556. n = q[(rb) + 8 + 1 + 128]; \
  6557. t = REDS2(n * alpha_tab[8 + 1 * 1]); \
  6558. q[(rb) + 8 + 1] = m + t; \
  6559. q[(rb) + 8 + 1 + 128] = m - t; \
  6560. m = q[(rb) + 8 + 2]; \
  6561. n = q[(rb) + 8 + 2 + 128]; \
  6562. t = REDS2(n * alpha_tab[8 + 2 * 1]); \
  6563. q[(rb) + 8 + 2] = m + t; \
  6564. q[(rb) + 8 + 2 + 128] = m - t; \
  6565. m = q[(rb) + 8 + 3]; \
  6566. n = q[(rb) + 8 + 3 + 128]; \
  6567. t = REDS2(n * alpha_tab[8 + 3 * 1]); \
  6568. q[(rb) + 8 + 3] = m + t; \
  6569. q[(rb) + 8 + 3 + 128] = m - t; \
  6570. m = q[(rb) + 12 + 0]; \
  6571. n = q[(rb) + 12 + 0 + 128]; \
  6572. t = REDS2(n * alpha_tab[12 + 0 * 1]); \
  6573. q[(rb) + 12 + 0] = m + t; \
  6574. q[(rb) + 12 + 0 + 128] = m - t; \
  6575. m = q[(rb) + 12 + 1]; \
  6576. n = q[(rb) + 12 + 1 + 128]; \
  6577. t = REDS2(n * alpha_tab[12 + 1 * 1]); \
  6578. q[(rb) + 12 + 1] = m + t; \
  6579. q[(rb) + 12 + 1 + 128] = m - t; \
  6580. m = q[(rb) + 12 + 2]; \
  6581. n = q[(rb) + 12 + 2 + 128]; \
  6582. t = REDS2(n * alpha_tab[12 + 2 * 1]); \
  6583. q[(rb) + 12 + 2] = m + t; \
  6584. q[(rb) + 12 + 2 + 128] = m - t; \
  6585. m = q[(rb) + 12 + 3]; \
  6586. n = q[(rb) + 12 + 3 + 128]; \
  6587. t = REDS2(n * alpha_tab[12 + 3 * 1]); \
  6588. q[(rb) + 12 + 3] = m + t; \
  6589. q[(rb) + 12 + 3 + 128] = m - t; \
  6590. m = q[(rb) + 16 + 0]; \
  6591. n = q[(rb) + 16 + 0 + 128]; \
  6592. t = REDS2(n * alpha_tab[16 + 0 * 1]); \
  6593. q[(rb) + 16 + 0] = m + t; \
  6594. q[(rb) + 16 + 0 + 128] = m - t; \
  6595. m = q[(rb) + 16 + 1]; \
  6596. n = q[(rb) + 16 + 1 + 128]; \
  6597. t = REDS2(n * alpha_tab[16 + 1 * 1]); \
  6598. q[(rb) + 16 + 1] = m + t; \
  6599. q[(rb) + 16 + 1 + 128] = m - t; \
  6600. m = q[(rb) + 16 + 2]; \
  6601. n = q[(rb) + 16 + 2 + 128]; \
  6602. t = REDS2(n * alpha_tab[16 + 2 * 1]); \
  6603. q[(rb) + 16 + 2] = m + t; \
  6604. q[(rb) + 16 + 2 + 128] = m - t; \
  6605. m = q[(rb) + 16 + 3]; \
  6606. n = q[(rb) + 16 + 3 + 128]; \
  6607. t = REDS2(n * alpha_tab[16 + 3 * 1]); \
  6608. q[(rb) + 16 + 3] = m + t; \
  6609. q[(rb) + 16 + 3 + 128] = m - t; \
  6610. m = q[(rb) + 20 + 0]; \
  6611. n = q[(rb) + 20 + 0 + 128]; \
  6612. t = REDS2(n * alpha_tab[20 + 0 * 1]); \
  6613. q[(rb) + 20 + 0] = m + t; \
  6614. q[(rb) + 20 + 0 + 128] = m - t; \
  6615. m = q[(rb) + 20 + 1]; \
  6616. n = q[(rb) + 20 + 1 + 128]; \
  6617. t = REDS2(n * alpha_tab[20 + 1 * 1]); \
  6618. q[(rb) + 20 + 1] = m + t; \
  6619. q[(rb) + 20 + 1 + 128] = m - t; \
  6620. m = q[(rb) + 20 + 2]; \
  6621. n = q[(rb) + 20 + 2 + 128]; \
  6622. t = REDS2(n * alpha_tab[20 + 2 * 1]); \
  6623. q[(rb) + 20 + 2] = m + t; \
  6624. q[(rb) + 20 + 2 + 128] = m - t; \
  6625. m = q[(rb) + 20 + 3]; \
  6626. n = q[(rb) + 20 + 3 + 128]; \
  6627. t = REDS2(n * alpha_tab[20 + 3 * 1]); \
  6628. q[(rb) + 20 + 3] = m + t; \
  6629. q[(rb) + 20 + 3 + 128] = m - t; \
  6630. m = q[(rb) + 24 + 0]; \
  6631. n = q[(rb) + 24 + 0 + 128]; \
  6632. t = REDS2(n * alpha_tab[24 + 0 * 1]); \
  6633. q[(rb) + 24 + 0] = m + t; \
  6634. q[(rb) + 24 + 0 + 128] = m - t; \
  6635. m = q[(rb) + 24 + 1]; \
  6636. n = q[(rb) + 24 + 1 + 128]; \
  6637. t = REDS2(n * alpha_tab[24 + 1 * 1]); \
  6638. q[(rb) + 24 + 1] = m + t; \
  6639. q[(rb) + 24 + 1 + 128] = m - t; \
  6640. m = q[(rb) + 24 + 2]; \
  6641. n = q[(rb) + 24 + 2 + 128]; \
  6642. t = REDS2(n * alpha_tab[24 + 2 * 1]); \
  6643. q[(rb) + 24 + 2] = m + t; \
  6644. q[(rb) + 24 + 2 + 128] = m - t; \
  6645. m = q[(rb) + 24 + 3]; \
  6646. n = q[(rb) + 24 + 3 + 128]; \
  6647. t = REDS2(n * alpha_tab[24 + 3 * 1]); \
  6648. q[(rb) + 24 + 3] = m + t; \
  6649. q[(rb) + 24 + 3 + 128] = m - t; \
  6650. m = q[(rb) + 28 + 0]; \
  6651. n = q[(rb) + 28 + 0 + 128]; \
  6652. t = REDS2(n * alpha_tab[28 + 0 * 1]); \
  6653. q[(rb) + 28 + 0] = m + t; \
  6654. q[(rb) + 28 + 0 + 128] = m - t; \
  6655. m = q[(rb) + 28 + 1]; \
  6656. n = q[(rb) + 28 + 1 + 128]; \
  6657. t = REDS2(n * alpha_tab[28 + 1 * 1]); \
  6658. q[(rb) + 28 + 1] = m + t; \
  6659. q[(rb) + 28 + 1 + 128] = m - t; \
  6660. m = q[(rb) + 28 + 2]; \
  6661. n = q[(rb) + 28 + 2 + 128]; \
  6662. t = REDS2(n * alpha_tab[28 + 2 * 1]); \
  6663. q[(rb) + 28 + 2] = m + t; \
  6664. q[(rb) + 28 + 2 + 128] = m - t; \
  6665. m = q[(rb) + 28 + 3]; \
  6666. n = q[(rb) + 28 + 3 + 128]; \
  6667. t = REDS2(n * alpha_tab[28 + 3 * 1]); \
  6668. q[(rb) + 28 + 3] = m + t; \
  6669. q[(rb) + 28 + 3 + 128] = m - t; \
  6670. m = q[(rb) + 32 + 0]; \
  6671. n = q[(rb) + 32 + 0 + 128]; \
  6672. t = REDS2(n * alpha_tab[32 + 0 * 1]); \
  6673. q[(rb) + 32 + 0] = m + t; \
  6674. q[(rb) + 32 + 0 + 128] = m - t; \
  6675. m = q[(rb) + 32 + 1]; \
  6676. n = q[(rb) + 32 + 1 + 128]; \
  6677. t = REDS2(n * alpha_tab[32 + 1 * 1]); \
  6678. q[(rb) + 32 + 1] = m + t; \
  6679. q[(rb) + 32 + 1 + 128] = m - t; \
  6680. m = q[(rb) + 32 + 2]; \
  6681. n = q[(rb) + 32 + 2 + 128]; \
  6682. t = REDS2(n * alpha_tab[32 + 2 * 1]); \
  6683. q[(rb) + 32 + 2] = m + t; \
  6684. q[(rb) + 32 + 2 + 128] = m - t; \
  6685. m = q[(rb) + 32 + 3]; \
  6686. n = q[(rb) + 32 + 3 + 128]; \
  6687. t = REDS2(n * alpha_tab[32 + 3 * 1]); \
  6688. q[(rb) + 32 + 3] = m + t; \
  6689. q[(rb) + 32 + 3 + 128] = m - t; \
  6690. m = q[(rb) + 36 + 0]; \
  6691. n = q[(rb) + 36 + 0 + 128]; \
  6692. t = REDS2(n * alpha_tab[36 + 0 * 1]); \
  6693. q[(rb) + 36 + 0] = m + t; \
  6694. q[(rb) + 36 + 0 + 128] = m - t; \
  6695. m = q[(rb) + 36 + 1]; \
  6696. n = q[(rb) + 36 + 1 + 128]; \
  6697. t = REDS2(n * alpha_tab[36 + 1 * 1]); \
  6698. q[(rb) + 36 + 1] = m + t; \
  6699. q[(rb) + 36 + 1 + 128] = m - t; \
  6700. m = q[(rb) + 36 + 2]; \
  6701. n = q[(rb) + 36 + 2 + 128]; \
  6702. t = REDS2(n * alpha_tab[36 + 2 * 1]); \
  6703. q[(rb) + 36 + 2] = m + t; \
  6704. q[(rb) + 36 + 2 + 128] = m - t; \
  6705. m = q[(rb) + 36 + 3]; \
  6706. n = q[(rb) + 36 + 3 + 128]; \
  6707. t = REDS2(n * alpha_tab[36 + 3 * 1]); \
  6708. q[(rb) + 36 + 3] = m + t; \
  6709. q[(rb) + 36 + 3 + 128] = m - t; \
  6710. m = q[(rb) + 40 + 0]; \
  6711. n = q[(rb) + 40 + 0 + 128]; \
  6712. t = REDS2(n * alpha_tab[40 + 0 * 1]); \
  6713. q[(rb) + 40 + 0] = m + t; \
  6714. q[(rb) + 40 + 0 + 128] = m - t; \
  6715. m = q[(rb) + 40 + 1]; \
  6716. n = q[(rb) + 40 + 1 + 128]; \
  6717. t = REDS2(n * alpha_tab[40 + 1 * 1]); \
  6718. q[(rb) + 40 + 1] = m + t; \
  6719. q[(rb) + 40 + 1 + 128] = m - t; \
  6720. m = q[(rb) + 40 + 2]; \
  6721. n = q[(rb) + 40 + 2 + 128]; \
  6722. t = REDS2(n * alpha_tab[40 + 2 * 1]); \
  6723. q[(rb) + 40 + 2] = m + t; \
  6724. q[(rb) + 40 + 2 + 128] = m - t; \
  6725. m = q[(rb) + 40 + 3]; \
  6726. n = q[(rb) + 40 + 3 + 128]; \
  6727. t = REDS2(n * alpha_tab[40 + 3 * 1]); \
  6728. q[(rb) + 40 + 3] = m + t; \
  6729. q[(rb) + 40 + 3 + 128] = m - t; \
  6730. m = q[(rb) + 44 + 0]; \
  6731. n = q[(rb) + 44 + 0 + 128]; \
  6732. t = REDS2(n * alpha_tab[44 + 0 * 1]); \
  6733. q[(rb) + 44 + 0] = m + t; \
  6734. q[(rb) + 44 + 0 + 128] = m - t; \
  6735. m = q[(rb) + 44 + 1]; \
  6736. n = q[(rb) + 44 + 1 + 128]; \
  6737. t = REDS2(n * alpha_tab[44 + 1 * 1]); \
  6738. q[(rb) + 44 + 1] = m + t; \
  6739. q[(rb) + 44 + 1 + 128] = m - t; \
  6740. m = q[(rb) + 44 + 2]; \
  6741. n = q[(rb) + 44 + 2 + 128]; \
  6742. t = REDS2(n * alpha_tab[44 + 2 * 1]); \
  6743. q[(rb) + 44 + 2] = m + t; \
  6744. q[(rb) + 44 + 2 + 128] = m - t; \
  6745. m = q[(rb) + 44 + 3]; \
  6746. n = q[(rb) + 44 + 3 + 128]; \
  6747. t = REDS2(n * alpha_tab[44 + 3 * 1]); \
  6748. q[(rb) + 44 + 3] = m + t; \
  6749. q[(rb) + 44 + 3 + 128] = m - t; \
  6750. m = q[(rb) + 48 + 0]; \
  6751. n = q[(rb) + 48 + 0 + 128]; \
  6752. t = REDS2(n * alpha_tab[48 + 0 * 1]); \
  6753. q[(rb) + 48 + 0] = m + t; \
  6754. q[(rb) + 48 + 0 + 128] = m - t; \
  6755. m = q[(rb) + 48 + 1]; \
  6756. n = q[(rb) + 48 + 1 + 128]; \
  6757. t = REDS2(n * alpha_tab[48 + 1 * 1]); \
  6758. q[(rb) + 48 + 1] = m + t; \
  6759. q[(rb) + 48 + 1 + 128] = m - t; \
  6760. m = q[(rb) + 48 + 2]; \
  6761. n = q[(rb) + 48 + 2 + 128]; \
  6762. t = REDS2(n * alpha_tab[48 + 2 * 1]); \
  6763. q[(rb) + 48 + 2] = m + t; \
  6764. q[(rb) + 48 + 2 + 128] = m - t; \
  6765. m = q[(rb) + 48 + 3]; \
  6766. n = q[(rb) + 48 + 3 + 128]; \
  6767. t = REDS2(n * alpha_tab[48 + 3 * 1]); \
  6768. q[(rb) + 48 + 3] = m + t; \
  6769. q[(rb) + 48 + 3 + 128] = m - t; \
  6770. m = q[(rb) + 52 + 0]; \
  6771. n = q[(rb) + 52 + 0 + 128]; \
  6772. t = REDS2(n * alpha_tab[52 + 0 * 1]); \
  6773. q[(rb) + 52 + 0] = m + t; \
  6774. q[(rb) + 52 + 0 + 128] = m - t; \
  6775. m = q[(rb) + 52 + 1]; \
  6776. n = q[(rb) + 52 + 1 + 128]; \
  6777. t = REDS2(n * alpha_tab[52 + 1 * 1]); \
  6778. q[(rb) + 52 + 1] = m + t; \
  6779. q[(rb) + 52 + 1 + 128] = m - t; \
  6780. m = q[(rb) + 52 + 2]; \
  6781. n = q[(rb) + 52 + 2 + 128]; \
  6782. t = REDS2(n * alpha_tab[52 + 2 * 1]); \
  6783. q[(rb) + 52 + 2] = m + t; \
  6784. q[(rb) + 52 + 2 + 128] = m - t; \
  6785. m = q[(rb) + 52 + 3]; \
  6786. n = q[(rb) + 52 + 3 + 128]; \
  6787. t = REDS2(n * alpha_tab[52 + 3 * 1]); \
  6788. q[(rb) + 52 + 3] = m + t; \
  6789. q[(rb) + 52 + 3 + 128] = m - t; \
  6790. m = q[(rb) + 56 + 0]; \
  6791. n = q[(rb) + 56 + 0 + 128]; \
  6792. t = REDS2(n * alpha_tab[56 + 0 * 1]); \
  6793. q[(rb) + 56 + 0] = m + t; \
  6794. q[(rb) + 56 + 0 + 128] = m - t; \
  6795. m = q[(rb) + 56 + 1]; \
  6796. n = q[(rb) + 56 + 1 + 128]; \
  6797. t = REDS2(n * alpha_tab[56 + 1 * 1]); \
  6798. q[(rb) + 56 + 1] = m + t; \
  6799. q[(rb) + 56 + 1 + 128] = m - t; \
  6800. m = q[(rb) + 56 + 2]; \
  6801. n = q[(rb) + 56 + 2 + 128]; \
  6802. t = REDS2(n * alpha_tab[56 + 2 * 1]); \
  6803. q[(rb) + 56 + 2] = m + t; \
  6804. q[(rb) + 56 + 2 + 128] = m - t; \
  6805. m = q[(rb) + 56 + 3]; \
  6806. n = q[(rb) + 56 + 3 + 128]; \
  6807. t = REDS2(n * alpha_tab[56 + 3 * 1]); \
  6808. q[(rb) + 56 + 3] = m + t; \
  6809. q[(rb) + 56 + 3 + 128] = m - t; \
  6810. m = q[(rb) + 60 + 0]; \
  6811. n = q[(rb) + 60 + 0 + 128]; \
  6812. t = REDS2(n * alpha_tab[60 + 0 * 1]); \
  6813. q[(rb) + 60 + 0] = m + t; \
  6814. q[(rb) + 60 + 0 + 128] = m - t; \
  6815. m = q[(rb) + 60 + 1]; \
  6816. n = q[(rb) + 60 + 1 + 128]; \
  6817. t = REDS2(n * alpha_tab[60 + 1 * 1]); \
  6818. q[(rb) + 60 + 1] = m + t; \
  6819. q[(rb) + 60 + 1 + 128] = m - t; \
  6820. m = q[(rb) + 60 + 2]; \
  6821. n = q[(rb) + 60 + 2 + 128]; \
  6822. t = REDS2(n * alpha_tab[60 + 2 * 1]); \
  6823. q[(rb) + 60 + 2] = m + t; \
  6824. q[(rb) + 60 + 2 + 128] = m - t; \
  6825. m = q[(rb) + 60 + 3]; \
  6826. n = q[(rb) + 60 + 3 + 128]; \
  6827. t = REDS2(n * alpha_tab[60 + 3 * 1]); \
  6828. q[(rb) + 60 + 3] = m + t; \
  6829. q[(rb) + 60 + 3 + 128] = m - t; \
  6830. m = q[(rb) + 64 + 0]; \
  6831. n = q[(rb) + 64 + 0 + 128]; \
  6832. t = REDS2(n * alpha_tab[64 + 0 * 1]); \
  6833. q[(rb) + 64 + 0] = m + t; \
  6834. q[(rb) + 64 + 0 + 128] = m - t; \
  6835. m = q[(rb) + 64 + 1]; \
  6836. n = q[(rb) + 64 + 1 + 128]; \
  6837. t = REDS2(n * alpha_tab[64 + 1 * 1]); \
  6838. q[(rb) + 64 + 1] = m + t; \
  6839. q[(rb) + 64 + 1 + 128] = m - t; \
  6840. m = q[(rb) + 64 + 2]; \
  6841. n = q[(rb) + 64 + 2 + 128]; \
  6842. t = REDS2(n * alpha_tab[64 + 2 * 1]); \
  6843. q[(rb) + 64 + 2] = m + t; \
  6844. q[(rb) + 64 + 2 + 128] = m - t; \
  6845. m = q[(rb) + 64 + 3]; \
  6846. n = q[(rb) + 64 + 3 + 128]; \
  6847. t = REDS2(n * alpha_tab[64 + 3 * 1]); \
  6848. q[(rb) + 64 + 3] = m + t; \
  6849. q[(rb) + 64 + 3 + 128] = m - t; \
  6850. m = q[(rb) + 68 + 0]; \
  6851. n = q[(rb) + 68 + 0 + 128]; \
  6852. t = REDS2(n * alpha_tab[68 + 0 * 1]); \
  6853. q[(rb) + 68 + 0] = m + t; \
  6854. q[(rb) + 68 + 0 + 128] = m - t; \
  6855. m = q[(rb) + 68 + 1]; \
  6856. n = q[(rb) + 68 + 1 + 128]; \
  6857. t = REDS2(n * alpha_tab[68 + 1 * 1]); \
  6858. q[(rb) + 68 + 1] = m + t; \
  6859. q[(rb) + 68 + 1 + 128] = m - t; \
  6860. m = q[(rb) + 68 + 2]; \
  6861. n = q[(rb) + 68 + 2 + 128]; \
  6862. t = REDS2(n * alpha_tab[68 + 2 * 1]); \
  6863. q[(rb) + 68 + 2] = m + t; \
  6864. q[(rb) + 68 + 2 + 128] = m - t; \
  6865. m = q[(rb) + 68 + 3]; \
  6866. n = q[(rb) + 68 + 3 + 128]; \
  6867. t = REDS2(n * alpha_tab[68 + 3 * 1]); \
  6868. q[(rb) + 68 + 3] = m + t; \
  6869. q[(rb) + 68 + 3 + 128] = m - t; \
  6870. m = q[(rb) + 72 + 0]; \
  6871. n = q[(rb) + 72 + 0 + 128]; \
  6872. t = REDS2(n * alpha_tab[72 + 0 * 1]); \
  6873. q[(rb) + 72 + 0] = m + t; \
  6874. q[(rb) + 72 + 0 + 128] = m - t; \
  6875. m = q[(rb) + 72 + 1]; \
  6876. n = q[(rb) + 72 + 1 + 128]; \
  6877. t = REDS2(n * alpha_tab[72 + 1 * 1]); \
  6878. q[(rb) + 72 + 1] = m + t; \
  6879. q[(rb) + 72 + 1 + 128] = m - t; \
  6880. m = q[(rb) + 72 + 2]; \
  6881. n = q[(rb) + 72 + 2 + 128]; \
  6882. t = REDS2(n * alpha_tab[72 + 2 * 1]); \
  6883. q[(rb) + 72 + 2] = m + t; \
  6884. q[(rb) + 72 + 2 + 128] = m - t; \
  6885. m = q[(rb) + 72 + 3]; \
  6886. n = q[(rb) + 72 + 3 + 128]; \
  6887. t = REDS2(n * alpha_tab[72 + 3 * 1]); \
  6888. q[(rb) + 72 + 3] = m + t; \
  6889. q[(rb) + 72 + 3 + 128] = m - t; \
  6890. m = q[(rb) + 76 + 0]; \
  6891. n = q[(rb) + 76 + 0 + 128]; \
  6892. t = REDS2(n * alpha_tab[76 + 0 * 1]); \
  6893. q[(rb) + 76 + 0] = m + t; \
  6894. q[(rb) + 76 + 0 + 128] = m - t; \
  6895. m = q[(rb) + 76 + 1]; \
  6896. n = q[(rb) + 76 + 1 + 128]; \
  6897. t = REDS2(n * alpha_tab[76 + 1 * 1]); \
  6898. q[(rb) + 76 + 1] = m + t; \
  6899. q[(rb) + 76 + 1 + 128] = m - t; \
  6900. m = q[(rb) + 76 + 2]; \
  6901. n = q[(rb) + 76 + 2 + 128]; \
  6902. t = REDS2(n * alpha_tab[76 + 2 * 1]); \
  6903. q[(rb) + 76 + 2] = m + t; \
  6904. q[(rb) + 76 + 2 + 128] = m - t; \
  6905. m = q[(rb) + 76 + 3]; \
  6906. n = q[(rb) + 76 + 3 + 128]; \
  6907. t = REDS2(n * alpha_tab[76 + 3 * 1]); \
  6908. q[(rb) + 76 + 3] = m + t; \
  6909. q[(rb) + 76 + 3 + 128] = m - t; \
  6910. m = q[(rb) + 80 + 0]; \
  6911. n = q[(rb) + 80 + 0 + 128]; \
  6912. t = REDS2(n * alpha_tab[80 + 0 * 1]); \
  6913. q[(rb) + 80 + 0] = m + t; \
  6914. q[(rb) + 80 + 0 + 128] = m - t; \
  6915. m = q[(rb) + 80 + 1]; \
  6916. n = q[(rb) + 80 + 1 + 128]; \
  6917. t = REDS2(n * alpha_tab[80 + 1 * 1]); \
  6918. q[(rb) + 80 + 1] = m + t; \
  6919. q[(rb) + 80 + 1 + 128] = m - t; \
  6920. m = q[(rb) + 80 + 2]; \
  6921. n = q[(rb) + 80 + 2 + 128]; \
  6922. t = REDS2(n * alpha_tab[80 + 2 * 1]); \
  6923. q[(rb) + 80 + 2] = m + t; \
  6924. q[(rb) + 80 + 2 + 128] = m - t; \
  6925. m = q[(rb) + 80 + 3]; \
  6926. n = q[(rb) + 80 + 3 + 128]; \
  6927. t = REDS2(n * alpha_tab[80 + 3 * 1]); \
  6928. q[(rb) + 80 + 3] = m + t; \
  6929. q[(rb) + 80 + 3 + 128] = m - t; \
  6930. m = q[(rb) + 84 + 0]; \
  6931. n = q[(rb) + 84 + 0 + 128]; \
  6932. t = REDS2(n * alpha_tab[84 + 0 * 1]); \
  6933. q[(rb) + 84 + 0] = m + t; \
  6934. q[(rb) + 84 + 0 + 128] = m - t; \
  6935. m = q[(rb) + 84 + 1]; \
  6936. n = q[(rb) + 84 + 1 + 128]; \
  6937. t = REDS2(n * alpha_tab[84 + 1 * 1]); \
  6938. q[(rb) + 84 + 1] = m + t; \
  6939. q[(rb) + 84 + 1 + 128] = m - t; \
  6940. m = q[(rb) + 84 + 2]; \
  6941. n = q[(rb) + 84 + 2 + 128]; \
  6942. t = REDS2(n * alpha_tab[84 + 2 * 1]); \
  6943. q[(rb) + 84 + 2] = m + t; \
  6944. q[(rb) + 84 + 2 + 128] = m - t; \
  6945. m = q[(rb) + 84 + 3]; \
  6946. n = q[(rb) + 84 + 3 + 128]; \
  6947. t = REDS2(n * alpha_tab[84 + 3 * 1]); \
  6948. q[(rb) + 84 + 3] = m + t; \
  6949. q[(rb) + 84 + 3 + 128] = m - t; \
  6950. m = q[(rb) + 88 + 0]; \
  6951. n = q[(rb) + 88 + 0 + 128]; \
  6952. t = REDS2(n * alpha_tab[88 + 0 * 1]); \
  6953. q[(rb) + 88 + 0] = m + t; \
  6954. q[(rb) + 88 + 0 + 128] = m - t; \
  6955. m = q[(rb) + 88 + 1]; \
  6956. n = q[(rb) + 88 + 1 + 128]; \
  6957. t = REDS2(n * alpha_tab[88 + 1 * 1]); \
  6958. q[(rb) + 88 + 1] = m + t; \
  6959. q[(rb) + 88 + 1 + 128] = m - t; \
  6960. m = q[(rb) + 88 + 2]; \
  6961. n = q[(rb) + 88 + 2 + 128]; \
  6962. t = REDS2(n * alpha_tab[88 + 2 * 1]); \
  6963. q[(rb) + 88 + 2] = m + t; \
  6964. q[(rb) + 88 + 2 + 128] = m - t; \
  6965. m = q[(rb) + 88 + 3]; \
  6966. n = q[(rb) + 88 + 3 + 128]; \
  6967. t = REDS2(n * alpha_tab[88 + 3 * 1]); \
  6968. q[(rb) + 88 + 3] = m + t; \
  6969. q[(rb) + 88 + 3 + 128] = m - t; \
  6970. m = q[(rb) + 92 + 0]; \
  6971. n = q[(rb) + 92 + 0 + 128]; \
  6972. t = REDS2(n * alpha_tab[92 + 0 * 1]); \
  6973. q[(rb) + 92 + 0] = m + t; \
  6974. q[(rb) + 92 + 0 + 128] = m - t; \
  6975. m = q[(rb) + 92 + 1]; \
  6976. n = q[(rb) + 92 + 1 + 128]; \
  6977. t = REDS2(n * alpha_tab[92 + 1 * 1]); \
  6978. q[(rb) + 92 + 1] = m + t; \
  6979. q[(rb) + 92 + 1 + 128] = m - t; \
  6980. m = q[(rb) + 92 + 2]; \
  6981. n = q[(rb) + 92 + 2 + 128]; \
  6982. t = REDS2(n * alpha_tab[92 + 2 * 1]); \
  6983. q[(rb) + 92 + 2] = m + t; \
  6984. q[(rb) + 92 + 2 + 128] = m - t; \
  6985. m = q[(rb) + 92 + 3]; \
  6986. n = q[(rb) + 92 + 3 + 128]; \
  6987. t = REDS2(n * alpha_tab[92 + 3 * 1]); \
  6988. q[(rb) + 92 + 3] = m + t; \
  6989. q[(rb) + 92 + 3 + 128] = m - t; \
  6990. m = q[(rb) + 96 + 0]; \
  6991. n = q[(rb) + 96 + 0 + 128]; \
  6992. t = REDS2(n * alpha_tab[96 + 0 * 1]); \
  6993. q[(rb) + 96 + 0] = m + t; \
  6994. q[(rb) + 96 + 0 + 128] = m - t; \
  6995. m = q[(rb) + 96 + 1]; \
  6996. n = q[(rb) + 96 + 1 + 128]; \
  6997. t = REDS2(n * alpha_tab[96 + 1 * 1]); \
  6998. q[(rb) + 96 + 1] = m + t; \
  6999. q[(rb) + 96 + 1 + 128] = m - t; \
  7000. m = q[(rb) + 96 + 2]; \
  7001. n = q[(rb) + 96 + 2 + 128]; \
  7002. t = REDS2(n * alpha_tab[96 + 2 * 1]); \
  7003. q[(rb) + 96 + 2] = m + t; \
  7004. q[(rb) + 96 + 2 + 128] = m - t; \
  7005. m = q[(rb) + 96 + 3]; \
  7006. n = q[(rb) + 96 + 3 + 128]; \
  7007. t = REDS2(n * alpha_tab[96 + 3 * 1]); \
  7008. q[(rb) + 96 + 3] = m + t; \
  7009. q[(rb) + 96 + 3 + 128] = m - t; \
  7010. m = q[(rb) + 100 + 0]; \
  7011. n = q[(rb) + 100 + 0 + 128]; \
  7012. t = REDS2(n * alpha_tab[100 + 0 * 1]); \
  7013. q[(rb) + 100 + 0] = m + t; \
  7014. q[(rb) + 100 + 0 + 128] = m - t; \
  7015. m = q[(rb) + 100 + 1]; \
  7016. n = q[(rb) + 100 + 1 + 128]; \
  7017. t = REDS2(n * alpha_tab[100 + 1 * 1]); \
  7018. q[(rb) + 100 + 1] = m + t; \
  7019. q[(rb) + 100 + 1 + 128] = m - t; \
  7020. m = q[(rb) + 100 + 2]; \
  7021. n = q[(rb) + 100 + 2 + 128]; \
  7022. t = REDS2(n * alpha_tab[100 + 2 * 1]); \
  7023. q[(rb) + 100 + 2] = m + t; \
  7024. q[(rb) + 100 + 2 + 128] = m - t; \
  7025. m = q[(rb) + 100 + 3]; \
  7026. n = q[(rb) + 100 + 3 + 128]; \
  7027. t = REDS2(n * alpha_tab[100 + 3 * 1]); \
  7028. q[(rb) + 100 + 3] = m + t; \
  7029. q[(rb) + 100 + 3 + 128] = m - t; \
  7030. m = q[(rb) + 104 + 0]; \
  7031. n = q[(rb) + 104 + 0 + 128]; \
  7032. t = REDS2(n * alpha_tab[104 + 0 * 1]); \
  7033. q[(rb) + 104 + 0] = m + t; \
  7034. q[(rb) + 104 + 0 + 128] = m - t; \
  7035. m = q[(rb) + 104 + 1]; \
  7036. n = q[(rb) + 104 + 1 + 128]; \
  7037. t = REDS2(n * alpha_tab[104 + 1 * 1]); \
  7038. q[(rb) + 104 + 1] = m + t; \
  7039. q[(rb) + 104 + 1 + 128] = m - t; \
  7040. m = q[(rb) + 104 + 2]; \
  7041. n = q[(rb) + 104 + 2 + 128]; \
  7042. t = REDS2(n * alpha_tab[104 + 2 * 1]); \
  7043. q[(rb) + 104 + 2] = m + t; \
  7044. q[(rb) + 104 + 2 + 128] = m - t; \
  7045. m = q[(rb) + 104 + 3]; \
  7046. n = q[(rb) + 104 + 3 + 128]; \
  7047. t = REDS2(n * alpha_tab[104 + 3 * 1]); \
  7048. q[(rb) + 104 + 3] = m + t; \
  7049. q[(rb) + 104 + 3 + 128] = m - t; \
  7050. m = q[(rb) + 108 + 0]; \
  7051. n = q[(rb) + 108 + 0 + 128]; \
  7052. t = REDS2(n * alpha_tab[108 + 0 * 1]); \
  7053. q[(rb) + 108 + 0] = m + t; \
  7054. q[(rb) + 108 + 0 + 128] = m - t; \
  7055. m = q[(rb) + 108 + 1]; \
  7056. n = q[(rb) + 108 + 1 + 128]; \
  7057. t = REDS2(n * alpha_tab[108 + 1 * 1]); \
  7058. q[(rb) + 108 + 1] = m + t; \
  7059. q[(rb) + 108 + 1 + 128] = m - t; \
  7060. m = q[(rb) + 108 + 2]; \
  7061. n = q[(rb) + 108 + 2 + 128]; \
  7062. t = REDS2(n * alpha_tab[108 + 2 * 1]); \
  7063. q[(rb) + 108 + 2] = m + t; \
  7064. q[(rb) + 108 + 2 + 128] = m - t; \
  7065. m = q[(rb) + 108 + 3]; \
  7066. n = q[(rb) + 108 + 3 + 128]; \
  7067. t = REDS2(n * alpha_tab[108 + 3 * 1]); \
  7068. q[(rb) + 108 + 3] = m + t; \
  7069. q[(rb) + 108 + 3 + 128] = m - t; \
  7070. m = q[(rb) + 112 + 0]; \
  7071. n = q[(rb) + 112 + 0 + 128]; \
  7072. t = REDS2(n * alpha_tab[112 + 0 * 1]); \
  7073. q[(rb) + 112 + 0] = m + t; \
  7074. q[(rb) + 112 + 0 + 128] = m - t; \
  7075. m = q[(rb) + 112 + 1]; \
  7076. n = q[(rb) + 112 + 1 + 128]; \
  7077. t = REDS2(n * alpha_tab[112 + 1 * 1]); \
  7078. q[(rb) + 112 + 1] = m + t; \
  7079. q[(rb) + 112 + 1 + 128] = m - t; \
  7080. m = q[(rb) + 112 + 2]; \
  7081. n = q[(rb) + 112 + 2 + 128]; \
  7082. t = REDS2(n * alpha_tab[112 + 2 * 1]); \
  7083. q[(rb) + 112 + 2] = m + t; \
  7084. q[(rb) + 112 + 2 + 128] = m - t; \
  7085. m = q[(rb) + 112 + 3]; \
  7086. n = q[(rb) + 112 + 3 + 128]; \
  7087. t = REDS2(n * alpha_tab[112 + 3 * 1]); \
  7088. q[(rb) + 112 + 3] = m + t; \
  7089. q[(rb) + 112 + 3 + 128] = m - t; \
  7090. m = q[(rb) + 116 + 0]; \
  7091. n = q[(rb) + 116 + 0 + 128]; \
  7092. t = REDS2(n * alpha_tab[116 + 0 * 1]); \
  7093. q[(rb) + 116 + 0] = m + t; \
  7094. q[(rb) + 116 + 0 + 128] = m - t; \
  7095. m = q[(rb) + 116 + 1]; \
  7096. n = q[(rb) + 116 + 1 + 128]; \
  7097. t = REDS2(n * alpha_tab[116 + 1 * 1]); \
  7098. q[(rb) + 116 + 1] = m + t; \
  7099. q[(rb) + 116 + 1 + 128] = m - t; \
  7100. m = q[(rb) + 116 + 2]; \
  7101. n = q[(rb) + 116 + 2 + 128]; \
  7102. t = REDS2(n * alpha_tab[116 + 2 * 1]); \
  7103. q[(rb) + 116 + 2] = m + t; \
  7104. q[(rb) + 116 + 2 + 128] = m - t; \
  7105. m = q[(rb) + 116 + 3]; \
  7106. n = q[(rb) + 116 + 3 + 128]; \
  7107. t = REDS2(n * alpha_tab[116 + 3 * 1]); \
  7108. q[(rb) + 116 + 3] = m + t; \
  7109. q[(rb) + 116 + 3 + 128] = m - t; \
  7110. m = q[(rb) + 120 + 0]; \
  7111. n = q[(rb) + 120 + 0 + 128]; \
  7112. t = REDS2(n * alpha_tab[120 + 0 * 1]); \
  7113. q[(rb) + 120 + 0] = m + t; \
  7114. q[(rb) + 120 + 0 + 128] = m - t; \
  7115. m = q[(rb) + 120 + 1]; \
  7116. n = q[(rb) + 120 + 1 + 128]; \
  7117. t = REDS2(n * alpha_tab[120 + 1 * 1]); \
  7118. q[(rb) + 120 + 1] = m + t; \
  7119. q[(rb) + 120 + 1 + 128] = m - t; \
  7120. m = q[(rb) + 120 + 2]; \
  7121. n = q[(rb) + 120 + 2 + 128]; \
  7122. t = REDS2(n * alpha_tab[120 + 2 * 1]); \
  7123. q[(rb) + 120 + 2] = m + t; \
  7124. q[(rb) + 120 + 2 + 128] = m - t; \
  7125. m = q[(rb) + 120 + 3]; \
  7126. n = q[(rb) + 120 + 3 + 128]; \
  7127. t = REDS2(n * alpha_tab[120 + 3 * 1]); \
  7128. q[(rb) + 120 + 3] = m + t; \
  7129. q[(rb) + 120 + 3 + 128] = m - t; \
  7130. m = q[(rb) + 124 + 0]; \
  7131. n = q[(rb) + 124 + 0 + 128]; \
  7132. t = REDS2(n * alpha_tab[124 + 0 * 1]); \
  7133. q[(rb) + 124 + 0] = m + t; \
  7134. q[(rb) + 124 + 0 + 128] = m - t; \
  7135. m = q[(rb) + 124 + 1]; \
  7136. n = q[(rb) + 124 + 1 + 128]; \
  7137. t = REDS2(n * alpha_tab[124 + 1 * 1]); \
  7138. q[(rb) + 124 + 1] = m + t; \
  7139. q[(rb) + 124 + 1 + 128] = m - t; \
  7140. m = q[(rb) + 124 + 2]; \
  7141. n = q[(rb) + 124 + 2 + 128]; \
  7142. t = REDS2(n * alpha_tab[124 + 2 * 1]); \
  7143. q[(rb) + 124 + 2] = m + t; \
  7144. q[(rb) + 124 + 2 + 128] = m - t; \
  7145. m = q[(rb) + 124 + 3]; \
  7146. n = q[(rb) + 124 + 3 + 128]; \
  7147. t = REDS2(n * alpha_tab[124 + 3 * 1]); \
  7148. q[(rb) + 124 + 3] = m + t; \
  7149. q[(rb) + 124 + 3 + 128] = m - t; \
  7150. } while (0)
  7151.  
  7152. /*
  7153. * Output ranges:
  7154. * d0: min= 0 max= 1020
  7155. * d1: min= -67 max= 4587
  7156. * d2: min=-4335 max= 4335
  7157. * d3: min=-4147 max= 507
  7158. * d4: min= -510 max= 510
  7159. * d5: min= -252 max= 4402
  7160. * d6: min=-4335 max= 4335
  7161. * d7: min=-4332 max= 322
  7162. */
  7163. #define FFT8(xb, xs, d) do { \
  7164. s32 x0 = x[(xb)]; \
  7165. s32 x1 = x[(xb) + (xs)]; \
  7166. s32 x2 = x[(xb) + 2 * (xs)]; \
  7167. s32 x3 = x[(xb) + 3 * (xs)]; \
  7168. s32 a0 = x0 + x2; \
  7169. s32 a1 = x0 + (x2 << 4); \
  7170. s32 a2 = x0 - x2; \
  7171. s32 a3 = x0 - (x2 << 4); \
  7172. s32 b0 = x1 + x3; \
  7173. s32 b1 = REDS1((x1 << 2) + (x3 << 6)); \
  7174. s32 b2 = (x1 << 4) - (x3 << 4); \
  7175. s32 b3 = REDS1((x1 << 6) + (x3 << 2)); \
  7176. d ## 0 = a0 + b0; \
  7177. d ## 1 = a1 + b1; \
  7178. d ## 2 = a2 + b2; \
  7179. d ## 3 = a3 + b3; \
  7180. d ## 4 = a0 - b0; \
  7181. d ## 5 = a1 - b1; \
  7182. d ## 6 = a2 - b2; \
  7183. d ## 7 = a3 - b3; \
  7184. } while (0)
  7185.  
  7186. /*
  7187. * When k=16, we have alpha=2. Multiplication by alpha^i is then reduced
  7188. * to some shifting.
  7189. *
  7190. * Output: within -591471..591723
  7191. */
  7192. #define FFT16(xb, xs, rb) do { \
  7193. s32 d1_0, d1_1, d1_2, d1_3, d1_4, d1_5, d1_6, d1_7; \
  7194. s32 d2_0, d2_1, d2_2, d2_3, d2_4, d2_5, d2_6, d2_7; \
  7195. FFT8(xb, (xs) << 1, d1_); \
  7196. FFT8((xb) + (xs), (xs) << 1, d2_); \
  7197. q[(rb) + 0] = d1_0 + d2_0; \
  7198. q[(rb) + 1] = d1_1 + (d2_1 << 1); \
  7199. q[(rb) + 2] = d1_2 + (d2_2 << 2); \
  7200. q[(rb) + 3] = d1_3 + (d2_3 << 3); \
  7201. q[(rb) + 4] = d1_4 + (d2_4 << 4); \
  7202. q[(rb) + 5] = d1_5 + (d2_5 << 5); \
  7203. q[(rb) + 6] = d1_6 + (d2_6 << 6); \
  7204. q[(rb) + 7] = d1_7 + (d2_7 << 7); \
  7205. q[(rb) + 8] = d1_0 - d2_0; \
  7206. q[(rb) + 9] = d1_1 - (d2_1 << 1); \
  7207. q[(rb) + 10] = d1_2 - (d2_2 << 2); \
  7208. q[(rb) + 11] = d1_3 - (d2_3 << 3); \
  7209. q[(rb) + 12] = d1_4 - (d2_4 << 4); \
  7210. q[(rb) + 13] = d1_5 - (d2_5 << 5); \
  7211. q[(rb) + 14] = d1_6 - (d2_6 << 6); \
  7212. q[(rb) + 15] = d1_7 - (d2_7 << 7); \
  7213. } while (0)
  7214.  
  7215. /*
  7216. * Output range: |q| <= 1183446
  7217. */
  7218. #define FFT32(xb, xs, rb, id) do { \
  7219. FFT16(xb, (xs) << 1, rb); \
  7220. FFT16((xb) + (xs), (xs) << 1, (rb) + 16); \
  7221. FFT_LOOP_16_8(rb); \
  7222. } while (0)
  7223.  
  7224. /*
  7225. * Output range: |q| <= 2366892
  7226. */
  7227. #define FFT64(xb, xs, rb) do { \
  7228. FFT32(xb, (xs) << 1, (rb), label_a); \
  7229. FFT32((xb) + (xs), (xs) << 1, (rb) + 32, label_b); \
  7230. FFT_LOOP_32_4(rb); \
  7231. } while (0)
  7232.  
  7233. /*
  7234. * Output range: |q| <= 9467568
  7235. */
  7236. #define FFT256(xb, xs, rb, id) do { \
  7237. FFT64((xb) + ((xs) * 0), (xs) << 2, (rb + 0)); \
  7238. FFT64((xb) + ((xs) * 2), (xs) << 2, (rb + 64)); \
  7239. FFT_LOOP_64_2(rb); \
  7240. FFT64((xb) + ((xs) * 1), (xs) << 2, (rb + 128)); \
  7241. FFT64((xb) + ((xs) * 3), (xs) << 2, (rb + 192)); \
  7242. FFT_LOOP_64_2((rb) + 128); \
  7243. FFT_LOOP_128_1(rb); \
  7244. } while (0)
  7245.  
  7246. /*
  7247. * beta^(255*i) mod 257
  7248. */
  7249. __constant const unsigned short yoff_b_n[] = {
  7250. 1, 163, 98, 40, 95, 65, 58, 202, 30, 7, 113, 172,
  7251. 23, 151, 198, 149, 129, 210, 49, 20, 176, 161, 29, 101,
  7252. 15, 132, 185, 86, 140, 204, 99, 203, 193, 105, 153, 10,
  7253. 88, 209, 143, 179, 136, 66, 221, 43, 70, 102, 178, 230,
  7254. 225, 181, 205, 5, 44, 233, 200, 218, 68, 33, 239, 150,
  7255. 35, 51, 89, 115, 241, 219, 231, 131, 22, 245, 100, 109,
  7256. 34, 145, 248, 75, 146, 154, 173, 186, 249, 238, 244, 194,
  7257. 11, 251, 50, 183, 17, 201, 124, 166, 73, 77, 215, 93,
  7258. 253, 119, 122, 97, 134, 254, 25, 220, 137, 229, 62, 83,
  7259. 165, 167, 236, 175, 255, 188, 61, 177, 67, 127, 141, 110,
  7260. 197, 243, 31, 170, 211, 212, 118, 216, 256, 94, 159, 217,
  7261. 162, 192, 199, 55, 227, 250, 144, 85, 234, 106, 59, 108,
  7262. 128, 47, 208, 237, 81, 96, 228, 156, 242, 125, 72, 171,
  7263. 117, 53, 158, 54, 64, 152, 104, 247, 169, 48, 114, 78,
  7264. 121, 191, 36, 214, 187, 155, 79, 27, 32, 76, 52, 252,
  7265. 213, 24, 57, 39, 189, 224, 18, 107, 222, 206, 168, 142,
  7266. 16, 38, 26, 126, 235, 12, 157, 148, 223, 112, 9, 182,
  7267. 111, 103, 84, 71, 8, 19, 13, 63, 246, 6, 207, 74,
  7268. 240, 56, 133, 91, 184, 180, 42, 164, 4, 138, 135, 160,
  7269. 123, 3, 232, 37, 120, 28, 195, 174, 92, 90, 21, 82,
  7270. 2, 69, 196, 80, 190, 130, 116, 147, 60, 14, 226, 87,
  7271. 46, 45, 139, 41
  7272. };
  7273.  
  7274. #define INNER(l, h, mm) (((u32)((l) * (mm)) & 0xFFFFU) \
  7275. + ((u32)((h) * (mm)) << 16))
  7276.  
  7277. #define W_BIG(sb, o1, o2, mm) \
  7278. (INNER(q[16 * (sb) + 2 * 0 + o1], q[16 * (sb) + 2 * 0 + o2], mm), \
  7279. INNER(q[16 * (sb) + 2 * 1 + o1], q[16 * (sb) + 2 * 1 + o2], mm), \
  7280. INNER(q[16 * (sb) + 2 * 2 + o1], q[16 * (sb) + 2 * 2 + o2], mm), \
  7281. INNER(q[16 * (sb) + 2 * 3 + o1], q[16 * (sb) + 2 * 3 + o2], mm), \
  7282. INNER(q[16 * (sb) + 2 * 4 + o1], q[16 * (sb) + 2 * 4 + o2], mm), \
  7283. INNER(q[16 * (sb) + 2 * 5 + o1], q[16 * (sb) + 2 * 5 + o2], mm), \
  7284. INNER(q[16 * (sb) + 2 * 6 + o1], q[16 * (sb) + 2 * 6 + o2], mm), \
  7285. INNER(q[16 * (sb) + 2 * 7 + o1], q[16 * (sb) + 2 * 7 + o2], mm)
  7286.  
  7287. #define WB_0_0 W_BIG( 4, 0, 1, 185)
  7288. #define WB_0_1 W_BIG( 6, 0, 1, 185)
  7289. #define WB_0_2 W_BIG( 0, 0, 1, 185)
  7290. #define WB_0_3 W_BIG( 2, 0, 1, 185)
  7291. #define WB_0_4 W_BIG( 7, 0, 1, 185)
  7292. #define WB_0_5 W_BIG( 5, 0, 1, 185)
  7293. #define WB_0_6 W_BIG( 3, 0, 1, 185)
  7294. #define WB_0_7 W_BIG( 1, 0, 1, 185)
  7295. #define WB_1_0 W_BIG(15, 0, 1, 185)
  7296. #define WB_1_1 W_BIG(11, 0, 1, 185)
  7297. #define WB_1_2 W_BIG(12, 0, 1, 185)
  7298. #define WB_1_3 W_BIG( 8, 0, 1, 185)
  7299. #define WB_1_4 W_BIG( 9, 0, 1, 185)
  7300. #define WB_1_5 W_BIG(13, 0, 1, 185)
  7301. #define WB_1_6 W_BIG(10, 0, 1, 185)
  7302. #define WB_1_7 W_BIG(14, 0, 1, 185)
  7303. #define WB_2_0 W_BIG(17, -256, -128, 233)
  7304. #define WB_2_1 W_BIG(18, -256, -128, 233)
  7305. #define WB_2_2 W_BIG(23, -256, -128, 233)
  7306. #define WB_2_3 W_BIG(20, -256, -128, 233)
  7307. #define WB_2_4 W_BIG(22, -256, -128, 233)
  7308. #define WB_2_5 W_BIG(21, -256, -128, 233)
  7309. #define WB_2_6 W_BIG(16, -256, -128, 233)
  7310. #define WB_2_7 W_BIG(19, -256, -128, 233)
  7311. #define WB_3_0 W_BIG(30, -383, -255, 233)
  7312. #define WB_3_1 W_BIG(24, -383, -255, 233)
  7313. #define WB_3_2 W_BIG(25, -383, -255, 233)
  7314. #define WB_3_3 W_BIG(31, -383, -255, 233)
  7315. #define WB_3_4 W_BIG(27, -383, -255, 233)
  7316. #define WB_3_5 W_BIG(29, -383, -255, 233)
  7317. #define WB_3_6 W_BIG(28, -383, -255, 233)
  7318. #define WB_3_7 W_BIG(26, -383, -255, 233)
  7319.  
  7320. #define IF(x, y, z) ((((y) ^ (z)) & (x)) ^ (z))
  7321. #define MAJ(x, y, z) (((x) & (y)) | (((x) | (y)) & (z)))
  7322.  
  7323. #define PP4_0_0 1
  7324. #define PP4_0_1 0
  7325. #define PP4_0_2 3
  7326. #define PP4_0_3 2
  7327. #define PP4_1_0 2
  7328. #define PP4_1_1 3
  7329. #define PP4_1_2 0
  7330. #define PP4_1_3 1
  7331. #define PP4_2_0 3
  7332. #define PP4_2_1 2
  7333. #define PP4_2_2 1
  7334. #define PP4_2_3 0
  7335.  
  7336. #define PP8_0_0 1
  7337. #define PP8_0_1 0
  7338. #define PP8_0_2 3
  7339. #define PP8_0_3 2
  7340. #define PP8_0_4 5
  7341. #define PP8_0_5 4
  7342. #define PP8_0_6 7
  7343. #define PP8_0_7 6
  7344.  
  7345. #define PP8_1_0 6
  7346. #define PP8_1_1 7
  7347. #define PP8_1_2 4
  7348. #define PP8_1_3 5
  7349. #define PP8_1_4 2
  7350. #define PP8_1_5 3
  7351. #define PP8_1_6 0
  7352. #define PP8_1_7 1
  7353.  
  7354. #define PP8_2_0 2
  7355. #define PP8_2_1 3
  7356. #define PP8_2_2 0
  7357. #define PP8_2_3 1
  7358. #define PP8_2_4 6
  7359. #define PP8_2_5 7
  7360. #define PP8_2_6 4
  7361. #define PP8_2_7 5
  7362.  
  7363. #define PP8_3_0 3
  7364. #define PP8_3_1 2
  7365. #define PP8_3_2 1
  7366. #define PP8_3_3 0
  7367. #define PP8_3_4 7
  7368. #define PP8_3_5 6
  7369. #define PP8_3_6 5
  7370. #define PP8_3_7 4
  7371.  
  7372. #define PP8_4_0 5
  7373. #define PP8_4_1 4
  7374. #define PP8_4_2 7
  7375. #define PP8_4_3 6
  7376. #define PP8_4_4 1
  7377. #define PP8_4_5 0
  7378. #define PP8_4_6 3
  7379. #define PP8_4_7 2
  7380.  
  7381. #define PP8_5_0 7
  7382. #define PP8_5_1 6
  7383. #define PP8_5_2 5
  7384. #define PP8_5_3 4
  7385. #define PP8_5_4 3
  7386. #define PP8_5_5 2
  7387. #define PP8_5_6 1
  7388. #define PP8_5_7 0
  7389.  
  7390. #define PP8_6_0 4
  7391. #define PP8_6_1 5
  7392. #define PP8_6_2 6
  7393. #define PP8_6_3 7
  7394. #define PP8_6_4 0
  7395. #define PP8_6_5 1
  7396. #define PP8_6_6 2
  7397. #define PP8_6_7 3
  7398.  
  7399. #define STEP_ELT(n, w, fun, s, ppb) do { \
  7400. u32 tt = T32(D ## n + (w) + fun(A ## n, B ## n, C ## n)); \
  7401. A ## n = T32(ROL32(tt, s) + XCAT(tA, XCAT(ppb, n))); \
  7402. D ## n = C ## n; \
  7403. C ## n = B ## n; \
  7404. B ## n = tA ## n; \
  7405. } while (0)
  7406.  
  7407. #define STEP_BIG(w0, w1, w2, w3, w4, w5, w6, w7, fun, r, s, pp8b) do { \
  7408. u32 tA0 = ROL32(A0, r); \
  7409. u32 tA1 = ROL32(A1, r); \
  7410. u32 tA2 = ROL32(A2, r); \
  7411. u32 tA3 = ROL32(A3, r); \
  7412. u32 tA4 = ROL32(A4, r); \
  7413. u32 tA5 = ROL32(A5, r); \
  7414. u32 tA6 = ROL32(A6, r); \
  7415. u32 tA7 = ROL32(A7, r); \
  7416. STEP_ELT(0, w0, fun, s, pp8b); \
  7417. STEP_ELT(1, w1, fun, s, pp8b); \
  7418. STEP_ELT(2, w2, fun, s, pp8b); \
  7419. STEP_ELT(3, w3, fun, s, pp8b); \
  7420. STEP_ELT(4, w4, fun, s, pp8b); \
  7421. STEP_ELT(5, w5, fun, s, pp8b); \
  7422. STEP_ELT(6, w6, fun, s, pp8b); \
  7423. STEP_ELT(7, w7, fun, s, pp8b); \
  7424. } while (0)
  7425.  
  7426. #define SIMD_M3_0_0 0_
  7427. #define SIMD_M3_1_0 1_
  7428. #define SIMD_M3_2_0 2_
  7429. #define SIMD_M3_3_0 0_
  7430. #define SIMD_M3_4_0 1_
  7431. #define SIMD_M3_5_0 2_
  7432. #define SIMD_M3_6_0 0_
  7433. #define SIMD_M3_7_0 1_
  7434.  
  7435. #define SIMD_M3_0_1 1_
  7436. #define SIMD_M3_1_1 2_
  7437. #define SIMD_M3_2_1 0_
  7438. #define SIMD_M3_3_1 1_
  7439. #define SIMD_M3_4_1 2_
  7440. #define SIMD_M3_5_1 0_
  7441. #define SIMD_M3_6_1 1_
  7442. #define SIMD_M3_7_1 2_
  7443.  
  7444. #define SIMD_M3_0_2 2_
  7445. #define SIMD_M3_1_2 0_
  7446. #define SIMD_M3_2_2 1_
  7447. #define SIMD_M3_3_2 2_
  7448. #define SIMD_M3_4_2 0_
  7449. #define SIMD_M3_5_2 1_
  7450. #define SIMD_M3_6_2 2_
  7451. #define SIMD_M3_7_2 0_
  7452.  
  7453. #define M7_0_0 0_
  7454. #define M7_1_0 1_
  7455. #define M7_2_0 2_
  7456. #define M7_3_0 3_
  7457. #define M7_4_0 4_
  7458. #define M7_5_0 5_
  7459. #define M7_6_0 6_
  7460. #define M7_7_0 0_
  7461.  
  7462. #define M7_0_1 1_
  7463. #define M7_1_1 2_
  7464. #define M7_2_1 3_
  7465. #define M7_3_1 4_
  7466. #define M7_4_1 5_
  7467. #define M7_5_1 6_
  7468. #define M7_6_1 0_
  7469. #define M7_7_1 1_
  7470.  
  7471. #define M7_0_2 2_
  7472. #define M7_1_2 3_
  7473. #define M7_2_2 4_
  7474. #define M7_3_2 5_
  7475. #define M7_4_2 6_
  7476. #define M7_5_2 0_
  7477. #define M7_6_2 1_
  7478. #define M7_7_2 2_
  7479.  
  7480. #define M7_0_3 3_
  7481. #define M7_1_3 4_
  7482. #define M7_2_3 5_
  7483. #define M7_3_3 6_
  7484. #define M7_4_3 0_
  7485. #define M7_5_3 1_
  7486. #define M7_6_3 2_
  7487. #define M7_7_3 3_
  7488.  
  7489. #define STEP_BIG_(w, fun, r, s, pp8b) STEP_BIG w, fun, r, s, pp8b)
  7490.  
  7491. #define ONE_ROUND_BIG(ri, isp, p0, p1, p2, p3) do { \
  7492. STEP_BIG_(WB_ ## ri ## 0, \
  7493. IF, p0, p1, XCAT(PP8_, M7_0_ ## isp)); \
  7494. STEP_BIG_(WB_ ## ri ## 1, \
  7495. IF, p1, p2, XCAT(PP8_, M7_1_ ## isp)); \
  7496. STEP_BIG_(WB_ ## ri ## 2, \
  7497. IF, p2, p3, XCAT(PP8_, M7_2_ ## isp)); \
  7498. STEP_BIG_(WB_ ## ri ## 3, \
  7499. IF, p3, p0, XCAT(PP8_, M7_3_ ## isp)); \
  7500. STEP_BIG_(WB_ ## ri ## 4, \
  7501. MAJ, p0, p1, XCAT(PP8_, M7_4_ ## isp)); \
  7502. STEP_BIG_(WB_ ## ri ## 5, \
  7503. MAJ, p1, p2, XCAT(PP8_, M7_5_ ## isp)); \
  7504. STEP_BIG_(WB_ ## ri ## 6, \
  7505. MAJ, p2, p3, XCAT(PP8_, M7_6_ ## isp)); \
  7506. STEP_BIG_(WB_ ## ri ## 7, \
  7507. MAJ, p3, p0, XCAT(PP8_, M7_7_ ## isp)); \
  7508. } while (0)
  7509.  
  7510. __constant const u32 SIMD_IV512[] = {
  7511. C32(0x0BA16B95), C32(0x72F999AD), C32(0x9FECC2AE), C32(0xBA3264FC),
  7512. C32(0x5E894929), C32(0x8E9F30E5), C32(0x2F1DAA37), C32(0xF0F2C558),
  7513. C32(0xAC506643), C32(0xA90635A5), C32(0xE25B878B), C32(0xAAB7878F),
  7514. C32(0x88817F7A), C32(0x0A02892B), C32(0x559A7550), C32(0x598F657E),
  7515. C32(0x7EEF60A1), C32(0x6B70E3E8), C32(0x9C1714D1), C32(0xB958E2A8),
  7516. C32(0xAB02675E), C32(0xED1C014F), C32(0xCD8D65BB), C32(0xFDB7A257),
  7517. C32(0x09254899), C32(0xD699C7BC), C32(0x9019B6DC), C32(0x2B9022E4),
  7518. C32(0x8FA14956), C32(0x21BF9BD3), C32(0xB94D0943), C32(0x6FFDDC22)
  7519. };
  7520.  
  7521. /* $Id: echo.c 227 2010-06-16 17:28:38Z tp $ */
  7522. /*
  7523. * ECHO implementation.
  7524. *
  7525. * ==========================(LICENSE BEGIN)============================
  7526. *
  7527. * Copyright (c) 2007-2010 Projet RNRT SAPHIR
  7528. *
  7529. * Permission is hereby granted, free of charge, to any person obtaining
  7530. * a copy of this software and associated documentation files (the
  7531. * "Software"), to deal in the Software without restriction, including
  7532. * without limitation the rights to use, copy, modify, merge, publish,
  7533. * distribute, sublicense, and/or sell copies of the Software, and to
  7534. * permit persons to whom the Software is furnished to do so, subject to
  7535. * the following conditions:
  7536. *
  7537. * The above copyright notice and this permission notice shall be
  7538. * included in all copies or substantial portions of the Software.
  7539. *
  7540. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  7541. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  7542. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  7543. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  7544. * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  7545. * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  7546. * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  7547. *
  7548. * ===========================(LICENSE END)=============================
  7549. *
  7550. * @author Thomas Pornin <thomas.pornin@cryptolog.com>
  7551. */
  7552.  
  7553. #define T32 SPH_T32
  7554. #define C32 SPH_C32
  7555. #if SPH_64
  7556. #define C64 SPH_C64
  7557. #endif
  7558.  
  7559.  
  7560. #define ECHO_DECL_STATE_BIG \
  7561. sph_u64 W00, W01, W10, W11, W20, W21, W30, W31, W40, W41, W50, W51, W60, W61, W70, W71, W80, W81, W90, W91, WA0, WA1, WB0, WB1, WC0, WC1, WD0, WD1, WE0, WE1, WF0, WF1;
  7562.  
  7563. #define AES_2ROUNDS(XX, XY) do { \
  7564. sph_u32 X0 = (sph_u32)(XX); \
  7565. sph_u32 X1 = (sph_u32)(XX >> 32); \
  7566. sph_u32 X2 = (sph_u32)(XY); \
  7567. sph_u32 X3 = (sph_u32)(XY >> 32); \
  7568. sph_u32 Y0, Y1, Y2, Y3; \
  7569. AES_ROUND_LE(X0, X1, X2, X3, K0, K1, K2, K3, Y0, Y1, Y2, Y3); \
  7570. AES_ROUND_NOKEY_LE(Y0, Y1, Y2, Y3, X0, X1, X2, X3); \
  7571. XX = (sph_u64)X0 | ((sph_u64)X1 << 32); \
  7572. XY = (sph_u64)X2 | ((sph_u64)X3 << 32); \
  7573. if ((K0 = T32(K0 + 1)) == 0) { \
  7574. if ((K1 = T32(K1 + 1)) == 0) \
  7575. if ((K2 = T32(K2 + 1)) == 0) \
  7576. K3 = T32(K3 + 1); \
  7577. } \
  7578. } while (0)
  7579.  
  7580. #define BIG_SUB_WORDS do { \
  7581. AES_2ROUNDS(W00, W01); \
  7582. AES_2ROUNDS(W10, W11); \
  7583. AES_2ROUNDS(W20, W21); \
  7584. AES_2ROUNDS(W30, W31); \
  7585. AES_2ROUNDS(W40, W41); \
  7586. AES_2ROUNDS(W50, W51); \
  7587. AES_2ROUNDS(W60, W61); \
  7588. AES_2ROUNDS(W70, W71); \
  7589. AES_2ROUNDS(W80, W81); \
  7590. AES_2ROUNDS(W90, W91); \
  7591. AES_2ROUNDS(WA0, WA1); \
  7592. AES_2ROUNDS(WB0, WB1); \
  7593. AES_2ROUNDS(WC0, WC1); \
  7594. AES_2ROUNDS(WD0, WD1); \
  7595. AES_2ROUNDS(WE0, WE1); \
  7596. AES_2ROUNDS(WF0, WF1); \
  7597. } while (0)
  7598.  
  7599. #define SHIFT_ROW1(a, b, c, d) do { \
  7600. sph_u64 tmp; \
  7601. tmp = W ## a ## 0; \
  7602. W ## a ## 0 = W ## b ## 0; \
  7603. W ## b ## 0 = W ## c ## 0; \
  7604. W ## c ## 0 = W ## d ## 0; \
  7605. W ## d ## 0 = tmp; \
  7606. tmp = W ## a ## 1; \
  7607. W ## a ## 1 = W ## b ## 1; \
  7608. W ## b ## 1 = W ## c ## 1; \
  7609. W ## c ## 1 = W ## d ## 1; \
  7610. W ## d ## 1 = tmp; \
  7611. } while (0)
  7612.  
  7613. #define SHIFT_ROW2(a, b, c, d) do { \
  7614. sph_u64 tmp; \
  7615. tmp = W ## a ## 0; \
  7616. W ## a ## 0 = W ## c ## 0; \
  7617. W ## c ## 0 = tmp; \
  7618. tmp = W ## b ## 0; \
  7619. W ## b ## 0 = W ## d ## 0; \
  7620. W ## d ## 0 = tmp; \
  7621. tmp = W ## a ## 1; \
  7622. W ## a ## 1 = W ## c ## 1; \
  7623. W ## c ## 1 = tmp; \
  7624. tmp = W ## b ## 1; \
  7625. W ## b ## 1 = W ## d ## 1; \
  7626. W ## d ## 1 = tmp; \
  7627. } while (0)
  7628.  
  7629. #define SHIFT_ROW3(a, b, c, d) SHIFT_ROW1(d, c, b, a)
  7630.  
  7631. #define BIG_SHIFT_ROWS do { \
  7632. SHIFT_ROW1(1, 5, 9, D); \
  7633. SHIFT_ROW2(2, 6, A, E); \
  7634. SHIFT_ROW3(3, 7, B, F); \
  7635. } while (0)
  7636.  
  7637. #define MIX_COLUMN1(ia, ib, ic, id, n) do { \
  7638. sph_u64 a = W ## ia ## n; \
  7639. sph_u64 b = W ## ib ## n; \
  7640. sph_u64 c = W ## ic ## n; \
  7641. sph_u64 d = W ## id ## n; \
  7642. sph_u64 ab = a ^ b; \
  7643. sph_u64 bc = b ^ c; \
  7644. sph_u64 cd = c ^ d; \
  7645. sph_u64 abx = ((ab & C64(0x8080808080808080)) >> 7) * 27U \
  7646. ^ ((ab & C64(0x7F7F7F7F7F7F7F7F)) << 1); \
  7647. sph_u64 bcx = ((bc & C64(0x8080808080808080)) >> 7) * 27U \
  7648. ^ ((bc & C64(0x7F7F7F7F7F7F7F7F)) << 1); \
  7649. sph_u64 cdx = ((cd & C64(0x8080808080808080)) >> 7) * 27U \
  7650. ^ ((cd & C64(0x7F7F7F7F7F7F7F7F)) << 1); \
  7651. W ## ia ## n = abx ^ bc ^ d; \
  7652. W ## ib ## n = bcx ^ a ^ cd; \
  7653. W ## ic ## n = cdx ^ ab ^ d; \
  7654. W ## id ## n = abx ^ bcx ^ cdx ^ ab ^ c; \
  7655. } while (0)
  7656.  
  7657. #define MIX_COLUMN(a, b, c, d) do { \
  7658. MIX_COLUMN1(a, b, c, d, 0); \
  7659. MIX_COLUMN1(a, b, c, d, 1); \
  7660. } while (0)
  7661.  
  7662. #define BIG_MIX_COLUMNS do { \
  7663. MIX_COLUMN(0, 1, 2, 3); \
  7664. MIX_COLUMN(4, 5, 6, 7); \
  7665. MIX_COLUMN(8, 9, A, B); \
  7666. MIX_COLUMN(C, D, E, F); \
  7667. } while (0)
  7668.  
  7669. #define BIG_ROUND do { \
  7670. BIG_SUB_WORDS; \
  7671. BIG_SHIFT_ROWS; \
  7672. BIG_MIX_COLUMNS; \
  7673. } while (0)
  7674.  
  7675. #define ECHO_COMPRESS_BIG(sc) do { \
  7676. sph_u32 K0 = sc->C0; \
  7677. sph_u32 K1 = sc->C1; \
  7678. sph_u32 K2 = sc->C2; \
  7679. sph_u32 K3 = sc->C3; \
  7680. unsigned u; \
  7681. INPUT_BLOCK_BIG(sc); \
  7682. for (u = 0; u < 10; u ++) { \
  7683. BIG_ROUND; \
  7684. } \
  7685. ECHO_FINAL_BIG; \
  7686. } while (0)
  7687.  
  7688.  
  7689. #define SWAP4(x) as_uint(as_uchar4(x).wzyx)
  7690. #define SWAP8(x) as_ulong(as_uchar8(x).s76543210)
  7691.  
  7692. #if SPH_BIG_ENDIAN
  7693. #define DEC64E(x) (x)
  7694. #define DEC64BE(x) (*(const __global sph_u64 *) (x));
  7695. #else
  7696. #define DEC64E(x) SWAP8(x)
  7697. #define DEC64BE(x) SWAP8(*(const __global sph_u64 *) (x));
  7698. #endif
  7699.  
  7700. typedef union {
  7701. unsigned char h1[64];
  7702. uint h4[16];
  7703. ulong h8[8];
  7704. } hash_t;
  7705.  
  7706. __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
  7707. __kernel void blake(__global unsigned char* block, volatile __global hash_t* hashes)
  7708. {
  7709. uint gid = get_global_id(0);
  7710. __global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
  7711.  
  7712. // blake
  7713. {
  7714. sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B);
  7715. sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1);
  7716. sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F);
  7717. sph_u64 H6 = SPH_C64(0x1F83D9ABFB41BD6B), H7 = SPH_C64(0x5BE0CD19137E2179);
  7718. sph_u64 S0 = 0, S1 = 0, S2 = 0, S3 = 0;
  7719. sph_u64 T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + (80 << 3), T1 = 0xFFFFFFFFFFFFFFFF;;
  7720.  
  7721. if ((T0 = SPH_T64(T0 + 1024)) < 1024)
  7722. {
  7723. T1 = SPH_T64(T1 + 1);
  7724. }
  7725. sph_u64 M0, M1, M2, M3, M4, M5, M6, M7;
  7726. sph_u64 M8, M9, MA, MB, MC, MD, ME, MF;
  7727. sph_u64 V0, V1, V2, V3, V4, V5, V6, V7;
  7728. sph_u64 V8, V9, VA, VB, VC, VD, VE, VF;
  7729. M0 = DEC64BE(block + 0);
  7730. M1 = DEC64BE(block + 8);
  7731. M2 = DEC64BE(block + 16);
  7732. M3 = DEC64BE(block + 24);
  7733. M4 = DEC64BE(block + 32);
  7734. M5 = DEC64BE(block + 40);
  7735. M6 = DEC64BE(block + 48);
  7736. M7 = DEC64BE(block + 56);
  7737. M8 = DEC64BE(block + 64);
  7738. M9 = DEC64BE(block + 72);
  7739. M9 &= 0xFFFFFFFF00000000;
  7740. M9 ^= SWAP4(gid);
  7741. MA = 0x8000000000000000;
  7742. MB = 0;
  7743. MC = 0;
  7744. MD = 1;
  7745. ME = 0;
  7746. MF = 0x280;
  7747.  
  7748. COMPRESS64;
  7749.  
  7750. hash->h8[0] = H0;
  7751. hash->h8[1] = H1;
  7752. hash->h8[2] = H2;
  7753. hash->h8[3] = H3;
  7754. hash->h8[4] = H4;
  7755. hash->h8[5] = H5;
  7756. hash->h8[6] = H6;
  7757. hash->h8[7] = H7;
  7758. }
  7759. barrier(CLK_GLOBAL_MEM_FENCE);
  7760. }
  7761.  
  7762. __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
  7763. __kernel void bmw(volatile __global hash_t* hashes)
  7764. {
  7765. uint gid = get_global_id(0);
  7766. __global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
  7767.  
  7768. // bmw
  7769. sph_u64 BMW_H[16];
  7770. for(unsigned u = 0; u < 16; u++)
  7771. BMW_H[u] = BMW_IV512[u];
  7772.  
  7773. sph_u64 BMW_h1[16], BMW_h2[16];
  7774. sph_u64 mv[16];
  7775.  
  7776. mv[ 0] = SWAP8(hash->h8[0]);
  7777. mv[ 1] = SWAP8(hash->h8[1]);
  7778. mv[ 2] = SWAP8(hash->h8[2]);
  7779. mv[ 3] = SWAP8(hash->h8[3]);
  7780. mv[ 4] = SWAP8(hash->h8[4]);
  7781. mv[ 5] = SWAP8(hash->h8[5]);
  7782. mv[ 6] = SWAP8(hash->h8[6]);
  7783. mv[ 7] = SWAP8(hash->h8[7]);
  7784. mv[ 8] = 0x80;
  7785. mv[ 9] = 0;
  7786. mv[10] = 0;
  7787. mv[11] = 0;
  7788. mv[12] = 0;
  7789. mv[13] = 0;
  7790. mv[14] = 0;
  7791. mv[15] = 0x200;
  7792. #define M(x) (mv[x])
  7793. #define H(x) (BMW_H[x])
  7794. #define dH(x) (BMW_h2[x])
  7795.  
  7796. FOLDb;
  7797.  
  7798. #undef M
  7799. #undef H
  7800. #undef dH
  7801.  
  7802. #define M(x) (BMW_h2[x])
  7803. #define H(x) (final_b[x])
  7804. #define dH(x) (BMW_h1[x])
  7805.  
  7806. FOLDb;
  7807.  
  7808. #undef M
  7809. #undef H
  7810. #undef dH
  7811.  
  7812. hash->h8[0] = SWAP8(BMW_h1[8]);
  7813. hash->h8[1] = SWAP8(BMW_h1[9]);
  7814. hash->h8[2] = SWAP8(BMW_h1[10]);
  7815. hash->h8[3] = SWAP8(BMW_h1[11]);
  7816. hash->h8[4] = SWAP8(BMW_h1[12]);
  7817. hash->h8[5] = SWAP8(BMW_h1[13]);
  7818. hash->h8[6] = SWAP8(BMW_h1[14]);
  7819. hash->h8[7] = SWAP8(BMW_h1[15]);
  7820. barrier(CLK_GLOBAL_MEM_FENCE);
  7821.  
  7822. }
  7823. __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
  7824. __kernel void groestl(volatile __global hash_t* hashes)
  7825. {
  7826. uint gid = get_global_id(0);
  7827. __global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
  7828.  
  7829. #if !SPH_SMALL_FOOTPRINT_GROESTL
  7830. __local sph_u64 T0[256], T1[256], T2[256], T3[256];
  7831. __local sph_u64 T4[256], T5[256], T6[256], T7[256];
  7832. #else
  7833. __local sph_u64 T0[256], T4[256];
  7834. #endif
  7835. int init = get_local_id(0);
  7836. int step = get_local_size(0);
  7837. for (int i = init; i < 256; i += step)
  7838. {
  7839. T0[i] = T0_C[i];
  7840. T4[i] = T4_C[i];
  7841. #if !SPH_SMALL_FOOTPRINT_GROESTL
  7842. T1[i] = T1_C[i];
  7843. T2[i] = T2_C[i];
  7844. T3[i] = T3_C[i];
  7845. T5[i] = T5_C[i];
  7846. T6[i] = T6_C[i];
  7847. T7[i] = T7_C[i];
  7848. #endif
  7849. }
  7850. barrier(CLK_LOCAL_MEM_FENCE); // groestl
  7851.  
  7852. sph_u64 H[16];
  7853. for (unsigned int u = 0; u < 15; u ++)
  7854. H[u] = 0;
  7855. #if USE_LE
  7856. H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40);
  7857. #else
  7858. H[15] = (sph_u64)512;
  7859. #endif
  7860.  
  7861. sph_u64 g[16], m[16];
  7862. m[0] = DEC64E(hash->h8[0]);
  7863. m[1] = DEC64E(hash->h8[1]);
  7864. m[2] = DEC64E(hash->h8[2]);
  7865. m[3] = DEC64E(hash->h8[3]);
  7866. m[4] = DEC64E(hash->h8[4]);
  7867. m[5] = DEC64E(hash->h8[5]);
  7868. m[6] = DEC64E(hash->h8[6]);
  7869. m[7] = DEC64E(hash->h8[7]);
  7870. for (unsigned int u = 0; u < 16; u ++)
  7871. g[u] = m[u] ^ H[u];
  7872. m[8] = 0x80; g[8] = m[8] ^ H[8];
  7873. m[9] = 0; g[9] = m[9] ^ H[9];
  7874. m[10] = 0; g[10] = m[10] ^ H[10];
  7875. m[11] = 0; g[11] = m[11] ^ H[11];
  7876. m[12] = 0; g[12] = m[12] ^ H[12];
  7877. m[13] = 0; g[13] = m[13] ^ H[13];
  7878. m[14] = 0; g[14] = m[14] ^ H[14];
  7879. m[15] = 0x100000000000000; g[15] = m[15] ^ H[15];
  7880. PERM_BIG_P(g);
  7881. PERM_BIG_Q(m);
  7882. for (unsigned int u = 0; u < 16; u ++)
  7883. H[u] ^= g[u] ^ m[u];
  7884. sph_u64 xH[16];
  7885. for (unsigned int u = 0; u < 16; u ++)
  7886. xH[u] = H[u];
  7887. PERM_BIG_P(xH);
  7888. for (unsigned int u = 0; u < 16; u ++)
  7889. H[u] ^= xH[u];
  7890. for (unsigned int u = 0; u < 8; u ++)
  7891. hash->h8[u] = DEC64E(H[u + 8]);
  7892. barrier(CLK_GLOBAL_MEM_FENCE);
  7893.  
  7894. }
  7895. __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
  7896. __kernel void skein(volatile __global hash_t* hashes)
  7897. {
  7898. uint gid = get_global_id(0);
  7899. __global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
  7900.  
  7901. // skein
  7902.  
  7903. sph_u64 h0 = SPH_C64(0x4903ADFF749C51CE), h1 = SPH_C64(0x0D95DE399746DF03), h2 = SPH_C64(0x8FD1934127C79BCE), h3 = SPH_C64(0x9A255629FF352CB1), h4 = SPH_C64(0x5DB62599DF6CA7B0), h5 = SPH_C64(0xEABE394CA9D5C3F4), h6 = SPH_C64(0x991112C71A75B523), h7 = SPH_C64(0xAE18A40B660FCC33);
  7904. sph_u64 m0, m1, m2, m3, m4, m5, m6, m7;
  7905. sph_u64 bcount = 0;
  7906.  
  7907. m0 = SWAP8(hash->h8[0]);
  7908. m1 = SWAP8(hash->h8[1]);
  7909. m2 = SWAP8(hash->h8[2]);
  7910. m3 = SWAP8(hash->h8[3]);
  7911. m4 = SWAP8(hash->h8[4]);
  7912. m5 = SWAP8(hash->h8[5]);
  7913. m6 = SWAP8(hash->h8[6]);
  7914. m7 = SWAP8(hash->h8[7]);
  7915. UBI_BIG(480, 64);
  7916. bcount = 0;
  7917. m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0;
  7918. UBI_BIG(510, 8);
  7919. hash->h8[0] = SWAP8(h0);
  7920. hash->h8[1] = SWAP8(h1);
  7921. hash->h8[2] = SWAP8(h2);
  7922. hash->h8[3] = SWAP8(h3);
  7923. hash->h8[4] = SWAP8(h4);
  7924. hash->h8[5] = SWAP8(h5);
  7925. hash->h8[6] = SWAP8(h6);
  7926. hash->h8[7] = SWAP8(h7);
  7927.  
  7928. barrier(CLK_GLOBAL_MEM_FENCE);
  7929. }
  7930. __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
  7931. __kernel void jh(volatile __global hash_t* hashes)
  7932. {
  7933. uint gid = get_global_id(0);
  7934. __global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
  7935.  
  7936. // jh
  7937.  
  7938. sph_u64 h0h = C64e(0x6fd14b963e00aa17), h0l = C64e(0x636a2e057a15d543), h1h = C64e(0x8a225e8d0c97ef0b), h1l = C64e(0xe9341259f2b3c361), h2h = C64e(0x891da0c1536f801e), h2l = C64e(0x2aa9056bea2b6d80), h3h = C64e(0x588eccdb2075baa6), h3l = C64e(0xa90f3a76baf83bf7);
  7939. sph_u64 h4h = C64e(0x0169e60541e34a69), h4l = C64e(0x46b58a8e2e6fe65a), h5h = C64e(0x1047a7d0c1843c24), h5l = C64e(0x3b6e71b12d5ac199), h6h = C64e(0xcf57f6ec9db1f856), h6l = C64e(0xa706887c5716b156), h7h = C64e(0xe3c2fcdfe68517fb), h7l = C64e(0x545a4678cc8cdd4b);
  7940. sph_u64 tmp;
  7941.  
  7942. for(int i = 0; i < 2; i++)
  7943. {
  7944. if (i == 0) {
  7945. h0h ^= DEC64E(hash->h8[0]);
  7946. h0l ^= DEC64E(hash->h8[1]);
  7947. h1h ^= DEC64E(hash->h8[2]);
  7948. h1l ^= DEC64E(hash->h8[3]);
  7949. h2h ^= DEC64E(hash->h8[4]);
  7950. h2l ^= DEC64E(hash->h8[5]);
  7951. h3h ^= DEC64E(hash->h8[6]);
  7952. h3l ^= DEC64E(hash->h8[7]);
  7953. } else if(i == 1) {
  7954. h4h ^= DEC64E(hash->h8[0]);
  7955. h4l ^= DEC64E(hash->h8[1]);
  7956. h5h ^= DEC64E(hash->h8[2]);
  7957. h5l ^= DEC64E(hash->h8[3]);
  7958. h6h ^= DEC64E(hash->h8[4]);
  7959. h6l ^= DEC64E(hash->h8[5]);
  7960. h7h ^= DEC64E(hash->h8[6]);
  7961. h7l ^= DEC64E(hash->h8[7]);
  7962.  
  7963. h0h ^= 0x80;
  7964. h3l ^= 0x2000000000000;
  7965. }
  7966. E8;
  7967. }
  7968. h4h ^= 0x80;
  7969. h7l ^= 0x2000000000000;
  7970.  
  7971. hash->h8[0] = DEC64E(h4h);
  7972. hash->h8[1] = DEC64E(h4l);
  7973. hash->h8[2] = DEC64E(h5h);
  7974. hash->h8[3] = DEC64E(h5l);
  7975. hash->h8[4] = DEC64E(h6h);
  7976. hash->h8[5] = DEC64E(h6l);
  7977. hash->h8[6] = DEC64E(h7h);
  7978. hash->h8[7] = DEC64E(h7l);
  7979.  
  7980. barrier(CLK_GLOBAL_MEM_FENCE);
  7981. }
  7982. __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
  7983. __kernel void keccak(volatile __global hash_t* hashes)
  7984. {
  7985. uint gid = get_global_id(0);
  7986. __global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
  7987.  
  7988. // keccak
  7989.  
  7990. sph_u64 a00 = 0, a01 = 0, a02 = 0, a03 = 0, a04 = 0;
  7991. sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0;
  7992. sph_u64 a20 = 0, a21 = 0, a22 = 0, a23 = 0, a24 = 0;
  7993. sph_u64 a30 = 0, a31 = 0, a32 = 0, a33 = 0, a34 = 0;
  7994. sph_u64 a40 = 0, a41 = 0, a42 = 0, a43 = 0, a44 = 0;
  7995.  
  7996. a10 = SPH_C64(0xFFFFFFFFFFFFFFFF);
  7997. a20 = SPH_C64(0xFFFFFFFFFFFFFFFF);
  7998. a31 = SPH_C64(0xFFFFFFFFFFFFFFFF);
  7999. a22 = SPH_C64(0xFFFFFFFFFFFFFFFF);
  8000. a23 = SPH_C64(0xFFFFFFFFFFFFFFFF);
  8001. a04 = SPH_C64(0xFFFFFFFFFFFFFFFF);
  8002.  
  8003. a00 ^= SWAP8(hash->h8[0]);
  8004. a10 ^= SWAP8(hash->h8[1]);
  8005. a20 ^= SWAP8(hash->h8[2]);
  8006. a30 ^= SWAP8(hash->h8[3]);
  8007. a40 ^= SWAP8(hash->h8[4]);
  8008. a01 ^= SWAP8(hash->h8[5]);
  8009. a11 ^= SWAP8(hash->h8[6]);
  8010. a21 ^= SWAP8(hash->h8[7]);
  8011. a31 ^= 0x8000000000000001;
  8012. KECCAK_F_1600;
  8013. // Finalize the "lane complement"
  8014. a10 = ~a10;
  8015. a20 = ~a20;
  8016.  
  8017. hash->h8[0] = SWAP8(a00);
  8018. hash->h8[1] = SWAP8(a10);
  8019. hash->h8[2] = SWAP8(a20);
  8020. hash->h8[3] = SWAP8(a30);
  8021. hash->h8[4] = SWAP8(a40);
  8022. hash->h8[5] = SWAP8(a01);
  8023. hash->h8[6] = SWAP8(a11);
  8024. hash->h8[7] = SWAP8(a21);
  8025.  
  8026. barrier(CLK_GLOBAL_MEM_FENCE);
  8027. }
  8028. __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
  8029. __kernel void luffa(volatile __global hash_t* hashes)
  8030. {
  8031. uint gid = get_global_id(0);
  8032. __global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
  8033.  
  8034. // luffa
  8035.  
  8036. sph_u32 V00 = SPH_C32(0x6d251e69), V01 = SPH_C32(0x44b051e0), V02 = SPH_C32(0x4eaa6fb4), V03 = SPH_C32(0xdbf78465), V04 = SPH_C32(0x6e292011), V05 = SPH_C32(0x90152df4), V06 = SPH_C32(0xee058139), V07 = SPH_C32(0xdef610bb);
  8037. sph_u32 V10 = SPH_C32(0xc3b44b95), V11 = SPH_C32(0xd9d2f256), V12 = SPH_C32(0x70eee9a0), V13 = SPH_C32(0xde099fa3), V14 = SPH_C32(0x5d9b0557), V15 = SPH_C32(0x8fc944b3), V16 = SPH_C32(0xcf1ccf0e), V17 = SPH_C32(0x746cd581);
  8038. sph_u32 V20 = SPH_C32(0xf7efc89d), V21 = SPH_C32(0x5dba5781), V22 = SPH_C32(0x04016ce5), V23 = SPH_C32(0xad659c05), V24 = SPH_C32(0x0306194f), V25 = SPH_C32(0x666d1836), V26 = SPH_C32(0x24aa230a), V27 = SPH_C32(0x8b264ae7);
  8039. sph_u32 V30 = SPH_C32(0x858075d5), V31 = SPH_C32(0x36d79cce), V32 = SPH_C32(0xe571f7d7), V33 = SPH_C32(0x204b1f67), V34 = SPH_C32(0x35870c6a), V35 = SPH_C32(0x57e9e923), V36 = SPH_C32(0x14bcb808), V37 = SPH_C32(0x7cde72ce);
  8040. sph_u32 V40 = SPH_C32(0x6c68e9be), V41 = SPH_C32(0x5ec41e22), V42 = SPH_C32(0xc825b7c7), V43 = SPH_C32(0xaffb4363), V44 = SPH_C32(0xf5df3999), V45 = SPH_C32(0x0fc688f1), V46 = SPH_C32(0xb07224cc), V47 = SPH_C32(0x03e86cea);
  8041.  
  8042. DECL_TMP8(M);
  8043.  
  8044. M0 = hash->h4[1];
  8045. M1 = hash->h4[0];
  8046. M2 = hash->h4[3];
  8047. M3 = hash->h4[2];
  8048. M4 = hash->h4[5];
  8049. M5 = hash->h4[4];
  8050. M6 = hash->h4[7];
  8051. M7 = hash->h4[6];
  8052.  
  8053. for(uint i = 0; i < 5; i++)
  8054. {
  8055. MI5;
  8056. LUFFA_P5;
  8057.  
  8058. if(i == 0) {
  8059. M0 = hash->h4[9];
  8060. M1 = hash->h4[8];
  8061. M2 = hash->h4[11];
  8062. M3 = hash->h4[10];
  8063. M4 = hash->h4[13];
  8064. M5 = hash->h4[12];
  8065. M6 = hash->h4[15];
  8066. M7 = hash->h4[14];
  8067. } else if(i == 1) {
  8068. M0 = 0x80000000;
  8069. M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0;
  8070. } else if(i == 2) {
  8071. M0 = M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0;
  8072. } else if(i == 3) {
  8073. hash->h4[1] = V00 ^ V10 ^ V20 ^ V30 ^ V40;
  8074. hash->h4[0] = V01 ^ V11 ^ V21 ^ V31 ^ V41;
  8075. hash->h4[3] = V02 ^ V12 ^ V22 ^ V32 ^ V42;
  8076. hash->h4[2] = V03 ^ V13 ^ V23 ^ V33 ^ V43;
  8077. hash->h4[5] = V04 ^ V14 ^ V24 ^ V34 ^ V44;
  8078. hash->h4[4] = V05 ^ V15 ^ V25 ^ V35 ^ V45;
  8079. hash->h4[7] = V06 ^ V16 ^ V26 ^ V36 ^ V46;
  8080. hash->h4[6] = V07 ^ V17 ^ V27 ^ V37 ^ V47;
  8081. }
  8082. }
  8083. hash->h4[9] = V00 ^ V10 ^ V20 ^ V30 ^ V40;
  8084. hash->h4[8] = V01 ^ V11 ^ V21 ^ V31 ^ V41;
  8085. hash->h4[11] = V02 ^ V12 ^ V22 ^ V32 ^ V42;
  8086. hash->h4[10] = V03 ^ V13 ^ V23 ^ V33 ^ V43;
  8087. hash->h4[13] = V04 ^ V14 ^ V24 ^ V34 ^ V44;
  8088. hash->h4[12] = V05 ^ V15 ^ V25 ^ V35 ^ V45;
  8089. hash->h4[15] = V06 ^ V16 ^ V26 ^ V36 ^ V46;
  8090. hash->h4[14] = V07 ^ V17 ^ V27 ^ V37 ^ V47;
  8091.  
  8092. barrier(CLK_GLOBAL_MEM_FENCE);
  8093. }
  8094. __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
  8095. __kernel void cubehash(volatile __global hash_t* hashes)
  8096. {
  8097. uint gid = get_global_id(0);
  8098. __global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
  8099.  
  8100. // cubehash.h1
  8101.  
  8102. sph_u32 x0 = SPH_C32(0x2AEA2A61), x1 = SPH_C32(0x50F494D4), x2 = SPH_C32(0x2D538B8B), x3 = SPH_C32(0x4167D83E);
  8103. sph_u32 x4 = SPH_C32(0x3FEE2313), x5 = SPH_C32(0xC701CF8C), x6 = SPH_C32(0xCC39968E), x7 = SPH_C32(0x50AC5695);
  8104. sph_u32 x8 = SPH_C32(0x4D42C787), x9 = SPH_C32(0xA647A8B3), xa = SPH_C32(0x97CF0BEF), xb = SPH_C32(0x825B4537);
  8105. sph_u32 xc = SPH_C32(0xEEF864D2), xd = SPH_C32(0xF22090C4), xe = SPH_C32(0xD0E5CD33), xf = SPH_C32(0xA23911AE);
  8106. sph_u32 xg = SPH_C32(0xFCD398D9), xh = SPH_C32(0x148FE485), xi = SPH_C32(0x1B017BEF), xj = SPH_C32(0xB6444532);
  8107. sph_u32 xk = SPH_C32(0x6A536159), xl = SPH_C32(0x2FF5781C), xm = SPH_C32(0x91FA7934), xn = SPH_C32(0x0DBADEA9);
  8108. sph_u32 xo = SPH_C32(0xD65C8A2B), xp = SPH_C32(0xA5A70E75), xq = SPH_C32(0xB1C62456), xr = SPH_C32(0xBC796576);
  8109. sph_u32 xs = SPH_C32(0x1921C8F7), xt = SPH_C32(0xE7989AF1), xu = SPH_C32(0x7795D246), xv = SPH_C32(0xD43E3B44);
  8110.  
  8111. x0 ^= SWAP4(hash->h4[1]);
  8112. x1 ^= SWAP4(hash->h4[0]);
  8113. x2 ^= SWAP4(hash->h4[3]);
  8114. x3 ^= SWAP4(hash->h4[2]);
  8115. x4 ^= SWAP4(hash->h4[5]);
  8116. x5 ^= SWAP4(hash->h4[4]);
  8117. x6 ^= SWAP4(hash->h4[7]);
  8118. x7 ^= SWAP4(hash->h4[6]);
  8119.  
  8120. for (int i = 0; i < 13; i ++) {
  8121. SIXTEEN_ROUNDS;
  8122.  
  8123. if (i == 0) {
  8124. x0 ^= SWAP4(hash->h4[9]);
  8125. x1 ^= SWAP4(hash->h4[8]);
  8126. x2 ^= SWAP4(hash->h4[11]);
  8127. x3 ^= SWAP4(hash->h4[10]);
  8128. x4 ^= SWAP4(hash->h4[13]);
  8129. x5 ^= SWAP4(hash->h4[12]);
  8130. x6 ^= SWAP4(hash->h4[15]);
  8131. x7 ^= SWAP4(hash->h4[14]);
  8132. } else if(i == 1) {
  8133. x0 ^= 0x80;
  8134. } else if (i == 2) {
  8135. xv ^= SPH_C32(1);
  8136. }
  8137. }
  8138.  
  8139. hash->h4[0] = x0;
  8140. hash->h4[1] = x1;
  8141. hash->h4[2] = x2;
  8142. hash->h4[3] = x3;
  8143. hash->h4[4] = x4;
  8144. hash->h4[5] = x5;
  8145. hash->h4[6] = x6;
  8146. hash->h4[7] = x7;
  8147. hash->h4[8] = x8;
  8148. hash->h4[9] = x9;
  8149. hash->h4[10] = xa;
  8150. hash->h4[11] = xb;
  8151. hash->h4[12] = xc;
  8152. hash->h4[13] = xd;
  8153. hash->h4[14] = xe;
  8154. hash->h4[15] = xf;
  8155.  
  8156. barrier(CLK_GLOBAL_MEM_FENCE);
  8157. }
  8158. __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
  8159. __kernel void shavite(volatile __global hash_t* hashes)
  8160. {
  8161. uint gid = get_global_id(0);
  8162. __global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
  8163. __local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256];
  8164. int init = get_local_id(0);
  8165. int step = get_local_size(0);
  8166. for (int i = init; i < 256; i += step)
  8167. {
  8168. AES0[i] = AES0_C[i];
  8169. AES1[i] = AES1_C[i];
  8170. AES2[i] = AES2_C[i];
  8171. AES3[i] = AES3_C[i];
  8172. }
  8173. barrier(CLK_LOCAL_MEM_FENCE);
  8174.  
  8175. // shavite
  8176. {
  8177. // IV
  8178. sph_u32 h0 = SPH_C32(0x72FCCDD8), h1 = SPH_C32(0x79CA4727), h2 = SPH_C32(0x128A077B), h3 = SPH_C32(0x40D55AEC);
  8179. sph_u32 h4 = SPH_C32(0xD1901A06), h5 = SPH_C32(0x430AE307), h6 = SPH_C32(0xB29F5CD1), h7 = SPH_C32(0xDF07FBFC);
  8180. sph_u32 h8 = SPH_C32(0x8E45D73D), h9 = SPH_C32(0x681AB538), hA = SPH_C32(0xBDE86578), hB = SPH_C32(0xDD577E47);
  8181. sph_u32 hC = SPH_C32(0xE275EADE), hD = SPH_C32(0x502D9FCD), hE = SPH_C32(0xB9357178), hF = SPH_C32(0x022A4B9A);
  8182.  
  8183. // state
  8184. sph_u32 rk00, rk01, rk02, rk03, rk04, rk05, rk06, rk07;
  8185. sph_u32 rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F;
  8186. sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17;
  8187. sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F;
  8188.  
  8189. sph_u32 sc_count0 = (64 << 3), sc_count1 = 0, sc_count2 = 0, sc_count3 = 0;
  8190.  
  8191. rk00 = hash->h4[0];
  8192. rk01 = hash->h4[1];
  8193. rk02 = hash->h4[2];
  8194. rk03 = hash->h4[3];
  8195. rk04 = hash->h4[4];
  8196. rk05 = hash->h4[5];
  8197. rk06 = hash->h4[6];
  8198. rk07 = hash->h4[7];
  8199. rk08 = hash->h4[8];
  8200. rk09 = hash->h4[9];
  8201. rk0A = hash->h4[10];
  8202. rk0B = hash->h4[11];
  8203. rk0C = hash->h4[12];
  8204. rk0D = hash->h4[13];
  8205. rk0E = hash->h4[14];
  8206. rk0F = hash->h4[15];
  8207. rk10 = 0x80;
  8208. rk11 = rk12 = rk13 = rk14 = rk15 = rk16 = rk17 = rk18 = rk19 = rk1A = 0;
  8209. rk1B = 0x2000000;
  8210. rk1C = rk1D = rk1E = 0;
  8211. rk1F = 0x2000000;
  8212.  
  8213. c512(buf);
  8214.  
  8215. hash->h4[0] = h0;
  8216. hash->h4[1] = h1;
  8217. hash->h4[2] = h2;
  8218. hash->h4[3] = h3;
  8219. hash->h4[4] = h4;
  8220. hash->h4[5] = h5;
  8221. hash->h4[6] = h6;
  8222. hash->h4[7] = h7;
  8223. hash->h4[8] = h8;
  8224. hash->h4[9] = h9;
  8225. hash->h4[10] = hA;
  8226. hash->h4[11] = hB;
  8227. hash->h4[12] = hC;
  8228. hash->h4[13] = hD;
  8229. hash->h4[14] = hE;
  8230. hash->h4[15] = hF;
  8231. }
  8232.  
  8233. barrier(CLK_GLOBAL_MEM_FENCE);
  8234. }
  8235. __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
  8236. __kernel void simd(volatile __global hash_t* hashes)
  8237. {
  8238. uint gid = get_global_id(0);
  8239. __global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
  8240.  
  8241. // simd
  8242. s32 q[256];
  8243. unsigned char x[128];
  8244. for(unsigned int i = 0; i < 64; i++)
  8245. x[i] = hash->h1[i];
  8246. for(unsigned int i = 64; i < 128; i++)
  8247. x[i] = 0;
  8248.  
  8249. u32 A0 = C32(0x0BA16B95), A1 = C32(0x72F999AD), A2 = C32(0x9FECC2AE), A3 = C32(0xBA3264FC), A4 = C32(0x5E894929), A5 = C32(0x8E9F30E5), A6 = C32(0x2F1DAA37), A7 = C32(0xF0F2C558);
  8250. u32 B0 = C32(0xAC506643), B1 = C32(0xA90635A5), B2 = C32(0xE25B878B), B3 = C32(0xAAB7878F), B4 = C32(0x88817F7A), B5 = C32(0x0A02892B), B6 = C32(0x559A7550), B7 = C32(0x598F657E);
  8251. u32 C0 = C32(0x7EEF60A1), C1 = C32(0x6B70E3E8), C2 = C32(0x9C1714D1), C3 = C32(0xB958E2A8), C4 = C32(0xAB02675E), C5 = C32(0xED1C014F), C6 = C32(0xCD8D65BB), C7 = C32(0xFDB7A257);
  8252. u32 D0 = C32(0x09254899), D1 = C32(0xD699C7BC), D2 = C32(0x9019B6DC), D3 = C32(0x2B9022E4), D4 = C32(0x8FA14956), D5 = C32(0x21BF9BD3), D6 = C32(0xB94D0943), D7 = C32(0x6FFDDC22);
  8253.  
  8254. FFT256(0, 1, 0, ll1);
  8255. for (int i = 0; i < 256; i ++) {
  8256. s32 tq;
  8257.  
  8258. tq = q[i] + yoff_b_n[i];
  8259. tq = REDS2(tq);
  8260. tq = REDS1(tq);
  8261. tq = REDS1(tq);
  8262. q[i] = (tq <= 128 ? tq : tq - 257);
  8263. }
  8264.  
  8265. A0 ^= hash->h4[0];
  8266. A1 ^= hash->h4[1];
  8267. A2 ^= hash->h4[2];
  8268. A3 ^= hash->h4[3];
  8269. A4 ^= hash->h4[4];
  8270. A5 ^= hash->h4[5];
  8271. A6 ^= hash->h4[6];
  8272. A7 ^= hash->h4[7];
  8273. B0 ^= hash->h4[8];
  8274. B1 ^= hash->h4[9];
  8275. B2 ^= hash->h4[10];
  8276. B3 ^= hash->h4[11];
  8277. B4 ^= hash->h4[12];
  8278. B5 ^= hash->h4[13];
  8279. B6 ^= hash->h4[14];
  8280. B7 ^= hash->h4[15];
  8281.  
  8282. ONE_ROUND_BIG(0_, 0, 3, 23, 17, 27);
  8283. ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7);
  8284. ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5);
  8285. ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25);
  8286.  
  8287. STEP_BIG(
  8288. C32(0x0BA16B95), C32(0x72F999AD), C32(0x9FECC2AE), C32(0xBA3264FC),
  8289. C32(0x5E894929), C32(0x8E9F30E5), C32(0x2F1DAA37), C32(0xF0F2C558),
  8290. IF, 4, 13, PP8_4_);
  8291. STEP_BIG(
  8292. C32(0xAC506643), C32(0xA90635A5), C32(0xE25B878B), C32(0xAAB7878F),
  8293. C32(0x88817F7A), C32(0x0A02892B), C32(0x559A7550), C32(0x598F657E),
  8294. IF, 13, 10, PP8_5_);
  8295. STEP_BIG(
  8296. C32(0x7EEF60A1), C32(0x6B70E3E8), C32(0x9C1714D1), C32(0xB958E2A8),
  8297. C32(0xAB02675E), C32(0xED1C014F), C32(0xCD8D65BB), C32(0xFDB7A257),
  8298. IF, 10, 25, PP8_6_);
  8299. STEP_BIG(
  8300. C32(0x09254899), C32(0xD699C7BC), C32(0x9019B6DC), C32(0x2B9022E4),
  8301. C32(0x8FA14956), C32(0x21BF9BD3), C32(0xB94D0943), C32(0x6FFDDC22),
  8302. IF, 25, 4, PP8_0_);
  8303.  
  8304. u32 COPY_A0 = A0, COPY_A1 = A1, COPY_A2 = A2, COPY_A3 = A3, COPY_A4 = A4, COPY_A5 = A5, COPY_A6 = A6, COPY_A7 = A7;
  8305. u32 COPY_B0 = B0, COPY_B1 = B1, COPY_B2 = B2, COPY_B3 = B3, COPY_B4 = B4, COPY_B5 = B5, COPY_B6 = B6, COPY_B7 = B7;
  8306. u32 COPY_C0 = C0, COPY_C1 = C1, COPY_C2 = C2, COPY_C3 = C3, COPY_C4 = C4, COPY_C5 = C5, COPY_C6 = C6, COPY_C7 = C7;
  8307. u32 COPY_D0 = D0, COPY_D1 = D1, COPY_D2 = D2, COPY_D3 = D3, COPY_D4 = D4, COPY_D5 = D5, COPY_D6 = D6, COPY_D7 = D7;
  8308.  
  8309. #define q SIMD_Q
  8310.  
  8311. A0 ^= 0x200;
  8312.  
  8313. ONE_ROUND_BIG(0_, 0, 3, 23, 17, 27);
  8314. ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7);
  8315. ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5);
  8316. ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25);
  8317. STEP_BIG(
  8318. COPY_A0, COPY_A1, COPY_A2, COPY_A3,
  8319. COPY_A4, COPY_A5, COPY_A6, COPY_A7,
  8320. IF, 4, 13, PP8_4_);
  8321. STEP_BIG(
  8322. COPY_B0, COPY_B1, COPY_B2, COPY_B3,
  8323. COPY_B4, COPY_B5, COPY_B6, COPY_B7,
  8324. IF, 13, 10, PP8_5_);
  8325. STEP_BIG(
  8326. COPY_C0, COPY_C1, COPY_C2, COPY_C3,
  8327. COPY_C4, COPY_C5, COPY_C6, COPY_C7,
  8328. IF, 10, 25, PP8_6_);
  8329. STEP_BIG(
  8330. COPY_D0, COPY_D1, COPY_D2, COPY_D3,
  8331. COPY_D4, COPY_D5, COPY_D6, COPY_D7,
  8332. IF, 25, 4, PP8_0_);
  8333. #undef q
  8334.  
  8335. hash->h4[0] = A0;
  8336. hash->h4[1] = A1;
  8337. hash->h4[2] = A2;
  8338. hash->h4[3] = A3;
  8339. hash->h4[4] = A4;
  8340. hash->h4[5] = A5;
  8341. hash->h4[6] = A6;
  8342. hash->h4[7] = A7;
  8343. hash->h4[8] = B0;
  8344. hash->h4[9] = B1;
  8345. hash->h4[10] = B2;
  8346. hash->h4[11] = B3;
  8347. hash->h4[12] = B4;
  8348. hash->h4[13] = B5;
  8349. hash->h4[14] = B6;
  8350. hash->h4[15] = B7;
  8351.  
  8352. barrier(CLK_GLOBAL_MEM_FENCE);
  8353. }
  8354.  
  8355. __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
  8356. __kernel void echo(volatile __global hash_t* hashes, volatile __global uint* output, const ulong target)
  8357. {
  8358. uint gid = get_global_id(0);
  8359. hash_t hash;
  8360.  
  8361. __local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256];
  8362. int init = get_local_id(0);
  8363. int step = get_local_size(0);
  8364. for (int i = init; i < 256; i += step)
  8365. {
  8366. AES0[i] = AES0_C[i];
  8367. AES1[i] = AES1_C[i];
  8368. AES2[i] = AES2_C[i];
  8369. AES3[i] = AES3_C[i];
  8370. }
  8371. barrier(CLK_LOCAL_MEM_FENCE);
  8372.  
  8373. // copies hashes to "hash"
  8374. uint offset = get_global_offset(0);
  8375. for (int i = 0; i < 8; i++) {
  8376. hash.h8[i] = hashes[gid-offset].h8[i];
  8377. }
  8378.  
  8379. // echo
  8380. sph_u64 W00, W01, W10, W11, W20, W21, W30, W31, W40, W41, W50, W51, W60, W61, W70, W71, W80, W81, W90, W91, WA0, WA1, WB0, WB1, WC0, WC1, WD0, WD1, WE0, WE1, WF0, WF1;
  8381. sph_u64 Vb00, Vb01, Vb10, Vb11, Vb20, Vb21, Vb30, Vb31, Vb40, Vb41, Vb50, Vb51, Vb60, Vb61, Vb70, Vb71;
  8382. Vb00 = Vb10 = Vb20 = Vb30 = Vb40 = Vb50 = Vb60 = Vb70 = 512UL;
  8383. Vb01 = Vb11 = Vb21 = Vb31 = Vb41 = Vb51 = Vb61 = Vb71 = 0;
  8384.  
  8385. sph_u32 K0 = 512;
  8386. sph_u32 K1 = 0;
  8387. sph_u32 K2 = 0;
  8388. sph_u32 K3 = 0;
  8389.  
  8390. W00 = Vb00;
  8391. W01 = Vb01;
  8392. W10 = Vb10;
  8393. W11 = Vb11;
  8394. W20 = Vb20;
  8395. W21 = Vb21;
  8396. W30 = Vb30;
  8397. W31 = Vb31;
  8398. W40 = Vb40;
  8399. W41 = Vb41;
  8400. W50 = Vb50;
  8401. W51 = Vb51;
  8402. W60 = Vb60;
  8403. W61 = Vb61;
  8404. W70 = Vb70;
  8405. W71 = Vb71;
  8406. W80 = hash.h8[0];
  8407. W81 = hash.h8[1];
  8408. W90 = hash.h8[2];
  8409. W91 = hash.h8[3];
  8410. WA0 = hash.h8[4];
  8411. WA1 = hash.h8[5];
  8412. WB0 = hash.h8[6];
  8413. WB1 = hash.h8[7];
  8414. WC0 = 0x80;
  8415. WC1 = 0;
  8416. WD0 = 0;
  8417. WD1 = 0;
  8418. WE0 = 0;
  8419. WE1 = 0x200000000000000;
  8420. WF0 = 0x200;
  8421. WF1 = 0;
  8422.  
  8423. for (unsigned u = 0; u < 10; u ++) {
  8424. BIG_ROUND;
  8425. }
  8426.  
  8427. Vb00 ^= hash.h8[0] ^ W00 ^ W80;
  8428. Vb01 ^= hash.h8[1] ^ W01 ^ W81;
  8429. Vb10 ^= hash.h8[2] ^ W10 ^ W90;
  8430. Vb11 ^= hash.h8[3] ^ W11 ^ W91;
  8431. Vb20 ^= hash.h8[4] ^ W20 ^ WA0;
  8432. Vb21 ^= hash.h8[5] ^ W21 ^ WA1;
  8433. Vb30 ^= hash.h8[6] ^ W30 ^ WB0;
  8434. Vb31 ^= hash.h8[7] ^ W31 ^ WB1;
  8435.  
  8436. bool result = (Vb11 <= target);
  8437. if (result)
  8438. output[atomic_inc(output+0xFF)] = SWAP4(gid);
  8439.  
  8440. barrier(CLK_GLOBAL_MEM_FENCE);
  8441. }
  8442.  
  8443. #endif// DARKCOIN_CL
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement