Advertisement
Guest User

Untitled

a guest
Dec 13th, 2011
210
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.45 KB | None | 0 0
  1. #ifndef OLD_ATI
  2. #pragma OPENCL EXTENSION cl_amd_media_ops : enable
  3. #endif
  4.  
  5.  
  6. __kernel void
  7. __attribute__((reqd_work_group_size(64, 1, 1)))
  8. sha256_long( __global uint4 *dst, uint4 input, uint size, uint8 chbase, __global uint *found_ind, __global uint *bitmaps, __global uint *found, __global uint *table, uint4 singlehash)
  9. {
  10.  
  11. uint4 w0,w1,w2,w3,w4,w5,w6,w7,w8,w9,w10,w11,w12,w13,w14,w16;
  12.  
  13. uint i,ib,ic,id;
  14. uint4 A,B,C,D,E,F,G,H,K,l,tmp1,tmp2,temp, SIZE;
  15. uint b1,b2,b3,b4,b5,b6,b7,b8,b9,b10,b11,b12,b13,b14,b15,b16;
  16. uint4 m= 0x00FF00FF;
  17. uint4 m2= 0xFF00FF00;
  18.  
  19. #define H0 0x6A09E667
  20. #define H1 0xBB67AE85
  21. #define H2 0x3C6EF372
  22. #define H3 0xA54FF53A
  23. #define H4 0x510E527F
  24. #define H5 0x9B05688C
  25. #define H6 0x1F83D9AB
  26. #define H7 0x5BE0CD19
  27.  
  28. #define Sl 8
  29. #define Sr 24
  30.  
  31.  
  32. uint4 chbase1=(uint4)(chbase.s0,chbase.s1,chbase.s2,chbase.s3);
  33.  
  34. ic = size+3;
  35. id = ic<<3;
  36. SIZE = (uint4)id;
  37.  
  38. w1 = (uint4)input.y;
  39. w2 = (uint4)input.z;
  40. w3 = (uint4)input.w;
  41.  
  42. i = table[get_global_id(0)];
  43. ib = (uint)i&255;
  44. ic = (uint)((i>>8)&255);
  45. id = (uint)((i>>16)&255);
  46. w0 = (uint4)(ib|(ic<<8)|(id<<16)|(chbase1<<24));
  47.  
  48.  
  49. w4=(uint4)0;
  50. w5=(uint4)0;
  51. w6=(uint4)0;
  52. w7=(uint4)0;
  53. w8=(uint4)0;
  54. w9=(uint4)0;
  55. w10=(uint4)0;
  56. w11=(uint4)0;
  57. w12=(uint4)0;
  58. w13=(uint4)0;
  59. w14=(uint4)0;
  60. w16=(uint4)0;
  61.  
  62. // Fix BFI_INT for Endian_Reverse32
  63. m+=w4;
  64. m2+=w4;
  65.  
  66. #ifdef OLD_ATI
  67. #define Endian_Reverse32(a) { l=(a);tmp1=rotate(l,Sl);tmp2=rotate(l,Sr); (a)=(tmp1 & m)|(tmp2 & m2); }
  68. #else
  69. #define Endian_Reverse32(a) { l=(a);tmp1=rotate(l,Sl);tmp2=rotate(l,Sr); (a)=amd_bytealign(m,tmp1,tmp2); }
  70. #endif
  71.  
  72. A=(uint4)H0;
  73. B=(uint4)H1;
  74. C=(uint4)H2;
  75. D=(uint4)H3;
  76. E=(uint4)H4;
  77. F=(uint4)H5;
  78. G=(uint4)H6;
  79. H=(uint4)H7;
  80.  
  81.  
  82. Endian_Reverse32(w0);
  83. //Endian_Reverse32(w1);
  84. //Endian_Reverse32(w2);
  85. //Endian_Reverse32(w3);
  86.  
  87. #define SHR(x,n) ((x) >> n)
  88. #define ROTR(x,n) (rotate(x,(32-n)))
  89.  
  90. #define S0(x) (ROTR(x, 7U) ^ SHR(x, 3U)^ ROTR(x,18U) )
  91. #define S1(x) (ROTR(x,17U) ^ SHR(x,10U)^ ROTR(x,19U) )
  92. #define S2(x) (ROTR(x, 2U) ^ ROTR(x,22U)^ ROTR(x,13U) )
  93. #define S3(x) (ROTR(x, 6U) ^ ROTR(x,25U)^ ROTR(x,11U) )
  94.  
  95. #ifdef OLD_ATI
  96. #define F1(x,y,z) (z ^ (x & (y ^ z)))
  97. #define F00(x,y,z) ((x & y) | (z & (x | y)))
  98. #define F0(x,y,z) ((x & y) | (z & (x | y)))
  99. #else
  100. #define F1(x,y,z) (amd_bytealign(x,y,z))
  101. #define F0(x,y,z) (amd_bytealign(x, (z | y), (y & z)))
  102. #define F00(x,y,z) ((x & y) | (z & (x | y)))
  103. #endif
  104.  
  105. #define P(a,b,c,d,e,f,g,h,x,K) {tmp1 = F1(e,f,g) + S3(e) + h + K +x;tmp2 = F0(a,b,c) + S2(a);d += tmp1; h = tmp1 + tmp2;}
  106. #define P0(a,b,c,d,e,f,g,h,K) {tmp1 = S3(e) + F1(e,f,g) + h + K;tmp2 = S2(a) + F0(a,b,c);d += tmp1; h = tmp1 + tmp2;}
  107. #define PI(a,b,c,d,e,f,g,h,x,K) {tmp1 = h + S3(e) + F1(e,f,g) + K + x;tmp2 = S2(a) + F00(a,b,c);d += tmp1; h = tmp1 + tmp2;}
  108.  
  109.  
  110.  
  111. PI(A, B, C, D, E, F, G, H, w0, 0x428A2F98);
  112. P(H, A, B, C, D, E, F, G, w1, 0x71374491);
  113. P(G, H, A, B, C, D, E, F, w2, 0xB5C0FBCF);
  114. P(F, G, H, A, B, C, D, E, w3, 0xE9B5DBA5);
  115. P0(E, F, G, H, A, B, C, D, 0x3956C25B);
  116. P0(D, E, F, G, H, A, B, C, 0x59F111F1);
  117. P0(C, D, E, F, G, H, A, B, 0x923F82A4);
  118. P0(B, C, D, E, F, G, H, A, 0xAB1C5ED5);
  119. P0(A, B, C, D, E, F, G, H, 0xD807AA98);
  120. P0(H, A, B, C, D, E, F, G, 0x12835B01);
  121. P0(G, H, A, B, C, D, E, F, 0x243185BE);
  122. P0(F, G, H, A, B, C, D, E, 0x550C7DC3);
  123. P0(E, F, G, H, A, B, C, D, 0x72BE5D74);
  124. P0(D, E, F, G, H, A, B, C, 0x80DEB1FE);
  125. P0(C, D, E, F, G, H, A, B, 0x9BDC06A7);
  126. P(B, C, D, E, F, G, H, A, SIZE, 0xC19BF174);
  127. w16=S0(w1)+w0; P(A, B, C, D, E, F, G, H, w16, 0xE49B69C1);
  128. w0=S1(SIZE)+S0(w2)+w1; P(H, A, B, C, D, E, F, G, w0, 0xEFBE4786);
  129. w1=S1(w16)+S0(w3)+w2; P(G, H, A, B, C, D, E, F, w1, 0x0FC19DC6);
  130. w2=S1(w0)+S0(w4)+w3; P(F, G, H, A, B, C, D, E, w2, 0x240CA1CC);
  131. w3=S1(w1)+w13+S0(w5)+w4; P(E, F, G, H, A, B, C, D, w3, 0x2DE92C6F);
  132. w4=S1(w2)+w14+S0(w6)+w5; P(D, E, F, G, H, A, B, C, w4, 0x4A7484AA);
  133. w5=S1(w3)+SIZE+S0(w7)+w6; P(C, D, E, F, G, H, A, B, w5, 0x5CB0A9DC);
  134. w6=S1(w4)+w16+S0(w8)+w7; P(B, C, D, E, F, G, H, A, w6, 0x76F988DA);
  135. w7=S1(w5)+w0+S0(w9)+w8; P(A, B, C, D, E, F, G, H, w7, 0x983E5152);
  136. w8=S1(w6)+w1+S0(w10)+w9; P(H, A, B, C, D, E, F, G, w8, 0xA831C66D);
  137. w9=S1(w7)+w2+S0(w11)+w10; P(G, H, A, B, C, D, E, F, w9, 0xB00327C8);
  138. w10=S1(w8)+w3+S0(w12)+w11; P(F, G, H, A, B, C, D, E, w10, 0xBF597FC7);
  139. w11=S1(w9)+w4+S0(w13)+w12; P(E, F, G, H, A, B, C, D, w11, 0xC6E00BF3);
  140. w12=S1(w10)+w5+S0(w14)+w13; P(D, E, F, G, H, A, B, C, w12, 0xD5A79147);
  141. w13=S1(w11)+w6+S0(SIZE)+w14; P(C, D, E, F, G, H, A, B, w13, 0x06CA6351);
  142. w14=S1(w12)+w7+S0(w16)+SIZE; P(B, C, D, E, F, G, H, A, w14, 0x14292967);
  143. SIZE=S1(w13)+w8+S0(w0)+w16; P(A, B, C, D, E, F, G, H, SIZE, 0x27B70A85);
  144. w16=S1(w14)+w9+S0(w1)+w0; P(H, A, B, C, D, E, F, G, w16, 0x2E1B2138);
  145. w0=S1(SIZE)+w10+S0(w2)+w1; P(G, H, A, B, C, D, E, F, w0, 0x4D2C6DFC);
  146. w1=S1(w16)+w11+S0(w3)+w2; P(F, G, H, A, B, C, D, E, w1, 0x53380D13);
  147. w2=S1(w0)+w12+S0(w4)+w3; P(E, F, G, H, A, B, C, D, w2, 0x650A7354);
  148. w3=S1(w1)+w13+S0(w5)+w4; P(D, E, F, G, H, A, B, C, w3, 0x766A0ABB);
  149. w4=S1(w2)+w14+S0(w6)+w5; P(C, D, E, F, G, H, A, B, w4, 0x81C2C92E);
  150. w5=S1(w3)+SIZE+S0(w7)+w6; P(B, C, D, E, F, G, H, A, w5, 0x92722C85);
  151. w6=S1(w4)+w16+S0(w8)+w7; P(A, B, C, D, E, F, G, H, w6, 0xA2BFE8A1);
  152. w7=S1(w5)+w0+S0(w9)+w8; P(H, A, B, C, D, E, F, G, w7, 0xA81A664B);
  153. w8=S1(w6)+w1+S0(w10)+w9; P(G, H, A, B, C, D, E, F, w8, 0xC24B8B70);
  154. w9=S1(w7)+w2+S0(w11)+w10; P(F, G, H, A, B, C, D, E, w9, 0xC76C51A3);
  155. w10=S1(w8)+w3+S0(w12)+w11; P(E, F, G, H, A, B, C, D, w10, 0xD192E819);
  156. w11=S1(w9)+w4+S0(w13)+w12; P(D, E, F, G, H, A, B, C, w11, 0xD6990624);
  157. w12=S1(w10)+w5+S0(w14)+w13; P(C, D, E, F, G, H, A, B, w12, 0xF40E3585);
  158. w13=S1(w11)+w6+S0(SIZE)+w14; P(B, C, D, E, F, G, H, A, w13, 0x106AA070);
  159. w14=S1(w12)+w7+S0(w16)+SIZE; P(A, B, C, D, E, F, G, H, w14, 0x19A4C116);
  160. SIZE=S1(w13)+w8+S0(w0)+w16; P(H, A, B, C, D, E, F, G, SIZE, 0x1E376C08);
  161. w16=S1(w14)+w9+S0(w1)+w0; P(G, H, A, B, C, D, E, F, w16, 0x2748774C);
  162. w0=S1(SIZE)+w10+S0(w2)+w1; P(F, G, H, A, B, C, D, E, w0, 0x34B0BCB5);
  163. w1=S1(w16)+w11+S0(w3)+w2; P(E, F, G, H, A, B, C, D, w1, 0x391C0CB3);
  164. w2=S1(w0)+w12+S0(w4)+w3; P(D, E, F, G, H, A, B, C, w2, 0x4ED8AA4A);
  165. w3=S1(w1)+w13+S0(w5)+w4; P(C, D, E, F, G, H, A, B, w3, 0x5B9CCA4F);
  166. w4=S1(w2)+w14+S0(w6)+w5; P(B, C, D, E, F, G, H, A, w4, 0x682E6FF3);
  167. w5=S1(w3)+SIZE+S0(w7)+w6; P(A, B, C, D, E, F, G, H, w5, 0x748F82EE);
  168. w6=S1(w4)+w16+S0(w8)+w7; P(H, A, B, C, D, E, F, G, w6, 0x78A5636F);
  169. w7=S1(w5)+w0+S0(w9)+w8; P(G, H, A, B, C, D, E, F, w7, 0x84C87814);
  170. w8=S1(w6)+w1+S0(w10)+w9; P(F, G, H, A, B, C, D, E, w8, 0x8CC70208);
  171. w9=S1(w7)+w2+S0(w11)+w10; P(E, F, G, H, A, B, C, D, w9, 0x90BEFFFA);
  172. w10=S1(w8)+w3+S0(w12)+w11; P(D, E, F, G, H, A, B, C, w10, 0xA4506CEB);
  173. #ifdef SINGLE_MODE
  174. if (all((uint4)singlehash.w != D)) return;
  175. #endif
  176. w11=S1(w9)+w4+S0(w13)+w12; P(C, D, E, F, G, H, A, B, w11, 0xBEF9A3F7);
  177. w12=S1(w10)+w5+S0(w14)+w13; P(B, C, D, E, F, G, H, A, w12, 0xC67178F2);
  178.  
  179.  
  180.  
  181. #ifdef SINGLE_MODE
  182. id=0;
  183. if ((singlehash.x==A.s0)&&(singlehash.y==B.s0)&&(singlehash.z==C.s0)) id = 1;
  184. if ((singlehash.x==A.s1)&&(singlehash.y==B.s1)&&(singlehash.z==C.s1)) id = 1;
  185. if ((singlehash.x==A.s2)&&(singlehash.y==B.s2)&&(singlehash.z==C.s2)) id = 1;
  186. if ((singlehash.x==A.s3)&&(singlehash.y==B.s3)&&(singlehash.z==C.s3)) id = 1;
  187. if (id==0) return;
  188. #endif
  189.  
  190.  
  191. A=A+(uint4)H0;
  192. B=B+(uint4)H1;
  193. C=C+(uint4)H2;
  194. D=D+(uint4)H3;
  195. E=E+(uint4)H4;
  196. F=F+(uint4)H5;
  197. G=G+(uint4)H6;
  198. H=H+(uint4)H7;
  199.  
  200.  
  201.  
  202. Endian_Reverse32(A);
  203. Endian_Reverse32(B);
  204. Endian_Reverse32(C);
  205. Endian_Reverse32(D);
  206. Endian_Reverse32(E);
  207. Endian_Reverse32(F);
  208. Endian_Reverse32(G);
  209. Endian_Reverse32(H);
  210.  
  211.  
  212.  
  213. #ifndef SINGLE_MODE
  214. id=0;
  215. b1=A.s0;b2=B.s0;b3=C.s0;b4=D.s0;
  216. b5=(singlehash.x >> (B.s0&31))&1;
  217. b6=(singlehash.y >> (C.s0&31))&1;
  218. b7=(singlehash.z >> (D.s0&31))&1;
  219. if ((b7) && (b5) && (b6)) if (((bitmaps[b1>>13]>>(b1&31))&1) && ((bitmaps[65535*8+(b2>>13)]>>(b2&31))&1) && ((bitmaps[(16*65535)+(b3>>13)]>>(b3&31))&1) && (
  220. (bitmaps[(24*65535)+(b4>>13)]>>(b4&31))&1)) id=1;
  221. b1=A.s1;b2=B.s1;b3=C.s1;b4=D.s1;
  222. b5=(singlehash.x >> (B.s1&31))&1;
  223. b6=(singlehash.y >> (C.s1&31))&1;
  224. b7=(singlehash.z >> (D.s1&31))&1;
  225. if ((b7) && (b5) && (b6)) if (((bitmaps[b1>>13]>>(b1&31))&1) && ((bitmaps[65535*8+(b2>>13)]>>(b2&31))&1) && ((bitmaps[(16*65535)+(b3>>13)]>>(b3&31))&1) && (
  226. (bitmaps[(24*65535)+(b4>>13)]>>(b4&31))&1)) id=1;
  227. b1=A.s2;b2=B.s2;b3=C.s2;b4=D.s2;
  228. b5=(singlehash.x >> (B.s2&31))&1;
  229. b6=(singlehash.y >> (C.s2&31))&1;
  230. b7=(singlehash.z >> (D.s2&31))&1;
  231. if ((b7) && (b5) && (b6)) if (((bitmaps[b1>>13]>>(b1&31))&1) && ((bitmaps[65535*8+(b2>>13)]>>(b2&31))&1) && ((bitmaps[(16*65535)+(b3>>13)]>>(b3&31))&1) && ((bitmaps[(24*65535)+(b4>>13)]>>(b4&31))&1)) id=1;
  232. b1=A.s3;b2=B.s3;b3=C.s3;b4=D.s3;
  233. b5=(singlehash.x >> (B.s3&31))&1;
  234. b6=(singlehash.y >> (C.s3&31))&1;
  235. b7=(singlehash.z >> (D.s3&31))&1;
  236. if ((b7) && (b5) && (b6)) if (((bitmaps[b1>>13]>>(b1&31))&1) && ((bitmaps[65535*8+(b2>>13)]>>(b2&31))&1) && ((bitmaps[(16*65535)+(b3>>13)]>>(b3&31))&1) && ((bitmaps[(24*65535)+(b4>>13)]>>(b4&31))&1)) id=1;
  237. if (id==0) return;
  238. #endif
  239.  
  240.  
  241.  
  242. if (id==1)
  243. {
  244. found[0] = 1;
  245. found_ind[get_global_id(0)] = 1;
  246. }
  247.  
  248. dst[(get_global_id(0)*8)] = (uint4)(A.s0,B.s0,C.s0,D.s0);
  249. dst[(get_global_id(0)*8)+1] = (uint4)(E.s0,F.s0,G.s0,H.s0);
  250. dst[(get_global_id(0)*8)+2] = (uint4)(A.s1,B.s1,C.s1,D.s1);
  251. dst[(get_global_id(0)*8)+3] = (uint4)(E.s1,F.s1,G.s1,H.s1);
  252. dst[(get_global_id(0)*8)+4] = (uint4)(A.s2,B.s2,C.s2,D.s2);
  253. dst[(get_global_id(0)*8)+5] = (uint4)(E.s2,F.s2,G.s2,H.s2);
  254. dst[(get_global_id(0)*8)+6] = (uint4)(A.s3,B.s3,C.s3,D.s3);
  255. dst[(get_global_id(0)*8)+7] = (uint4)(E.s3,F.s3,G.s3,H.s3);
  256.  
  257.  
  258. }
  259.  
  260.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement