Guest User

Untitled

a guest
Dec 13th, 2011
177
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #ifndef OLD_ATI
  2. #pragma OPENCL EXTENSION cl_amd_media_ops : enable
  3. #endif
  4.  
  5.  
  6. __kernel void
  7. __attribute__((reqd_work_group_size(64, 1, 1)))
  8. sha256_long( __global uint4 *dst, uint4 input, uint size, uint8 chbase, __global uint *found_ind, __global uint *bitmaps, __global uint *found, __global uint *table, uint4 singlehash)
  9. {
  10.  
  11. uint4 w0,w1,w2,w3,w4,w5,w6,w7,w8,w9,w10,w11,w12,w13,w14,w16;
  12.  
  13. uint i,ib,ic,id;
  14. uint4 A,B,C,D,E,F,G,H,K,l,tmp1,tmp2,temp, SIZE;
  15. uint b1,b2,b3,b4,b5,b6,b7,b8,b9,b10,b11,b12,b13,b14,b15,b16;
  16. uint4 m= 0x00FF00FF;
  17. uint4 m2= 0xFF00FF00;
  18.  
  19. #define H0 0x6A09E667
  20. #define H1 0xBB67AE85
  21. #define H2 0x3C6EF372
  22. #define H3 0xA54FF53A
  23. #define H4 0x510E527F
  24. #define H5 0x9B05688C
  25. #define H6 0x1F83D9AB
  26. #define H7 0x5BE0CD19
  27.  
  28. #define Sl 8
  29. #define Sr 24
  30.  
  31.  
  32. uint4 chbase1=(uint4)(chbase.s0,chbase.s1,chbase.s2,chbase.s3);
  33.  
  34. ic = size+3;
  35. id = ic<<3;
  36. SIZE = (uint4)id;
  37.  
  38. w1 = (uint4)input.y;
  39. w2 = (uint4)input.z;
  40. w3 = (uint4)input.w;
  41.  
  42. i = table[get_global_id(0)];
  43. ib = (uint)i&255;
  44. ic = (uint)((i>>8)&255);
  45. id = (uint)((i>>16)&255);
  46. w0 = (uint4)(ib|(ic<<8)|(id<<16)|(chbase1<<24));
  47.  
  48.  
  49. w4=(uint4)0;
  50. w5=(uint4)0;
  51. w6=(uint4)0;
  52. w7=(uint4)0;
  53. w8=(uint4)0;
  54. w9=(uint4)0;
  55. w10=(uint4)0;
  56. w11=(uint4)0;
  57. w12=(uint4)0;
  58. w13=(uint4)0;
  59. w14=(uint4)0;
  60. w16=(uint4)0;
  61.  
  62. // Fix BFI_INT for Endian_Reverse32
  63. m+=w4;
  64. m2+=w4;
  65.  
  66. #ifdef OLD_ATI
  67. #define Endian_Reverse32(a) { l=(a);tmp1=rotate(l,Sl);tmp2=rotate(l,Sr); (a)=(tmp1 & m)|(tmp2 & m2); }
  68. #else
  69. #define Endian_Reverse32(a) { l=(a);tmp1=rotate(l,Sl);tmp2=rotate(l,Sr); (a)=amd_bytealign(m,tmp1,tmp2); }
  70. #endif
  71.  
  72. A=(uint4)H0;
  73. B=(uint4)H1;
  74. C=(uint4)H2;
  75. D=(uint4)H3;
  76. E=(uint4)H4;
  77. F=(uint4)H5;
  78. G=(uint4)H6;
  79. H=(uint4)H7;
  80.  
  81.  
  82. Endian_Reverse32(w0);
  83. //Endian_Reverse32(w1);
  84. //Endian_Reverse32(w2);
  85. //Endian_Reverse32(w3);
  86.  
  87. #define SHR(x,n) ((x) >> n)
  88. #define ROTR(x,n) (rotate(x,(32-n)))
  89.  
  90. #define S0(x) (ROTR(x, 7U) ^ SHR(x, 3U)^ ROTR(x,18U) )
  91. #define S1(x) (ROTR(x,17U) ^ SHR(x,10U)^ ROTR(x,19U) )
  92. #define S2(x) (ROTR(x, 2U) ^ ROTR(x,22U)^ ROTR(x,13U) )
  93. #define S3(x) (ROTR(x, 6U) ^ ROTR(x,25U)^ ROTR(x,11U) )
  94.  
  95. #ifdef OLD_ATI
  96. #define F1(x,y,z) (z ^ (x & (y ^ z)))
  97. #define F00(x,y,z) ((x & y) | (z & (x | y)))
  98. #define F0(x,y,z) ((x & y) | (z & (x | y)))
  99. #else
  100. #define F1(x,y,z) (amd_bytealign(x,y,z))
  101. #define F0(x,y,z) (amd_bytealign(x, (z | y), (y & z)))
  102. #define F00(x,y,z) ((x & y) | (z & (x | y)))
  103. #endif
  104.  
  105. #define P(a,b,c,d,e,f,g,h,x,K) {tmp1 = F1(e,f,g) + S3(e) + h + K +x;tmp2 = F0(a,b,c) + S2(a);d += tmp1; h = tmp1 + tmp2;}
  106. #define P0(a,b,c,d,e,f,g,h,K) {tmp1 = S3(e) + F1(e,f,g) + h + K;tmp2 = S2(a) + F0(a,b,c);d += tmp1; h = tmp1 + tmp2;}
  107. #define PI(a,b,c,d,e,f,g,h,x,K) {tmp1 = h + S3(e) + F1(e,f,g) + K + x;tmp2 = S2(a) + F00(a,b,c);d += tmp1; h = tmp1 + tmp2;}
  108.  
  109.  
  110.  
  111. PI(A, B, C, D, E, F, G, H, w0, 0x428A2F98);
  112. P(H, A, B, C, D, E, F, G, w1, 0x71374491);
  113. P(G, H, A, B, C, D, E, F, w2, 0xB5C0FBCF);
  114. P(F, G, H, A, B, C, D, E, w3, 0xE9B5DBA5);
  115. P0(E, F, G, H, A, B, C, D, 0x3956C25B);
  116. P0(D, E, F, G, H, A, B, C, 0x59F111F1);
  117. P0(C, D, E, F, G, H, A, B, 0x923F82A4);
  118. P0(B, C, D, E, F, G, H, A, 0xAB1C5ED5);
  119. P0(A, B, C, D, E, F, G, H, 0xD807AA98);
  120. P0(H, A, B, C, D, E, F, G, 0x12835B01);
  121. P0(G, H, A, B, C, D, E, F, 0x243185BE);
  122. P0(F, G, H, A, B, C, D, E, 0x550C7DC3);
  123. P0(E, F, G, H, A, B, C, D, 0x72BE5D74);
  124. P0(D, E, F, G, H, A, B, C, 0x80DEB1FE);
  125. P0(C, D, E, F, G, H, A, B, 0x9BDC06A7);
  126. P(B, C, D, E, F, G, H, A, SIZE, 0xC19BF174);
  127. w16=S0(w1)+w0; P(A, B, C, D, E, F, G, H, w16, 0xE49B69C1);
  128. w0=S1(SIZE)+S0(w2)+w1; P(H, A, B, C, D, E, F, G, w0, 0xEFBE4786);
  129. w1=S1(w16)+S0(w3)+w2; P(G, H, A, B, C, D, E, F, w1, 0x0FC19DC6);
  130. w2=S1(w0)+S0(w4)+w3; P(F, G, H, A, B, C, D, E, w2, 0x240CA1CC);
  131. w3=S1(w1)+w13+S0(w5)+w4; P(E, F, G, H, A, B, C, D, w3, 0x2DE92C6F);
  132. w4=S1(w2)+w14+S0(w6)+w5; P(D, E, F, G, H, A, B, C, w4, 0x4A7484AA);
  133. w5=S1(w3)+SIZE+S0(w7)+w6; P(C, D, E, F, G, H, A, B, w5, 0x5CB0A9DC);
  134. w6=S1(w4)+w16+S0(w8)+w7; P(B, C, D, E, F, G, H, A, w6, 0x76F988DA);
  135. w7=S1(w5)+w0+S0(w9)+w8; P(A, B, C, D, E, F, G, H, w7, 0x983E5152);
  136. w8=S1(w6)+w1+S0(w10)+w9; P(H, A, B, C, D, E, F, G, w8, 0xA831C66D);
  137. w9=S1(w7)+w2+S0(w11)+w10; P(G, H, A, B, C, D, E, F, w9, 0xB00327C8);
  138. w10=S1(w8)+w3+S0(w12)+w11; P(F, G, H, A, B, C, D, E, w10, 0xBF597FC7);
  139. w11=S1(w9)+w4+S0(w13)+w12; P(E, F, G, H, A, B, C, D, w11, 0xC6E00BF3);
  140. w12=S1(w10)+w5+S0(w14)+w13; P(D, E, F, G, H, A, B, C, w12, 0xD5A79147);
  141. w13=S1(w11)+w6+S0(SIZE)+w14; P(C, D, E, F, G, H, A, B, w13, 0x06CA6351);
  142. w14=S1(w12)+w7+S0(w16)+SIZE; P(B, C, D, E, F, G, H, A, w14, 0x14292967);
  143. SIZE=S1(w13)+w8+S0(w0)+w16; P(A, B, C, D, E, F, G, H, SIZE, 0x27B70A85);
  144. w16=S1(w14)+w9+S0(w1)+w0; P(H, A, B, C, D, E, F, G, w16, 0x2E1B2138);
  145. w0=S1(SIZE)+w10+S0(w2)+w1; P(G, H, A, B, C, D, E, F, w0, 0x4D2C6DFC);
  146. w1=S1(w16)+w11+S0(w3)+w2; P(F, G, H, A, B, C, D, E, w1, 0x53380D13);
  147. w2=S1(w0)+w12+S0(w4)+w3; P(E, F, G, H, A, B, C, D, w2, 0x650A7354);
  148. w3=S1(w1)+w13+S0(w5)+w4; P(D, E, F, G, H, A, B, C, w3, 0x766A0ABB);
  149. w4=S1(w2)+w14+S0(w6)+w5; P(C, D, E, F, G, H, A, B, w4, 0x81C2C92E);
  150. w5=S1(w3)+SIZE+S0(w7)+w6; P(B, C, D, E, F, G, H, A, w5, 0x92722C85);
  151. w6=S1(w4)+w16+S0(w8)+w7; P(A, B, C, D, E, F, G, H, w6, 0xA2BFE8A1);
  152. w7=S1(w5)+w0+S0(w9)+w8; P(H, A, B, C, D, E, F, G, w7, 0xA81A664B);
  153. w8=S1(w6)+w1+S0(w10)+w9; P(G, H, A, B, C, D, E, F, w8, 0xC24B8B70);
  154. w9=S1(w7)+w2+S0(w11)+w10; P(F, G, H, A, B, C, D, E, w9, 0xC76C51A3);
  155. w10=S1(w8)+w3+S0(w12)+w11; P(E, F, G, H, A, B, C, D, w10, 0xD192E819);
  156. w11=S1(w9)+w4+S0(w13)+w12; P(D, E, F, G, H, A, B, C, w11, 0xD6990624);
  157. w12=S1(w10)+w5+S0(w14)+w13; P(C, D, E, F, G, H, A, B, w12, 0xF40E3585);
  158. w13=S1(w11)+w6+S0(SIZE)+w14; P(B, C, D, E, F, G, H, A, w13, 0x106AA070);
  159. w14=S1(w12)+w7+S0(w16)+SIZE; P(A, B, C, D, E, F, G, H, w14, 0x19A4C116);
  160. SIZE=S1(w13)+w8+S0(w0)+w16; P(H, A, B, C, D, E, F, G, SIZE, 0x1E376C08);
  161. w16=S1(w14)+w9+S0(w1)+w0; P(G, H, A, B, C, D, E, F, w16, 0x2748774C);
  162. w0=S1(SIZE)+w10+S0(w2)+w1; P(F, G, H, A, B, C, D, E, w0, 0x34B0BCB5);
  163. w1=S1(w16)+w11+S0(w3)+w2; P(E, F, G, H, A, B, C, D, w1, 0x391C0CB3);
  164. w2=S1(w0)+w12+S0(w4)+w3; P(D, E, F, G, H, A, B, C, w2, 0x4ED8AA4A);
  165. w3=S1(w1)+w13+S0(w5)+w4; P(C, D, E, F, G, H, A, B, w3, 0x5B9CCA4F);
  166. w4=S1(w2)+w14+S0(w6)+w5; P(B, C, D, E, F, G, H, A, w4, 0x682E6FF3);
  167. w5=S1(w3)+SIZE+S0(w7)+w6; P(A, B, C, D, E, F, G, H, w5, 0x748F82EE);
  168. w6=S1(w4)+w16+S0(w8)+w7; P(H, A, B, C, D, E, F, G, w6, 0x78A5636F);
  169. w7=S1(w5)+w0+S0(w9)+w8; P(G, H, A, B, C, D, E, F, w7, 0x84C87814);
  170. w8=S1(w6)+w1+S0(w10)+w9; P(F, G, H, A, B, C, D, E, w8, 0x8CC70208);
  171. w9=S1(w7)+w2+S0(w11)+w10; P(E, F, G, H, A, B, C, D, w9, 0x90BEFFFA);
  172. w10=S1(w8)+w3+S0(w12)+w11; P(D, E, F, G, H, A, B, C, w10, 0xA4506CEB);
  173. #ifdef SINGLE_MODE
  174. if (all((uint4)singlehash.w != D)) return;
  175. #endif
  176. w11=S1(w9)+w4+S0(w13)+w12; P(C, D, E, F, G, H, A, B, w11, 0xBEF9A3F7);
  177. w12=S1(w10)+w5+S0(w14)+w13; P(B, C, D, E, F, G, H, A, w12, 0xC67178F2);
  178.  
  179.  
  180.  
  181. #ifdef SINGLE_MODE
  182. id=0;
  183. if ((singlehash.x==A.s0)&&(singlehash.y==B.s0)&&(singlehash.z==C.s0)) id = 1;
  184. if ((singlehash.x==A.s1)&&(singlehash.y==B.s1)&&(singlehash.z==C.s1)) id = 1;
  185. if ((singlehash.x==A.s2)&&(singlehash.y==B.s2)&&(singlehash.z==C.s2)) id = 1;
  186. if ((singlehash.x==A.s3)&&(singlehash.y==B.s3)&&(singlehash.z==C.s3)) id = 1;
  187. if (id==0) return;
  188. #endif
  189.  
  190.  
  191. A=A+(uint4)H0;
  192. B=B+(uint4)H1;
  193. C=C+(uint4)H2;
  194. D=D+(uint4)H3;
  195. E=E+(uint4)H4;
  196. F=F+(uint4)H5;
  197. G=G+(uint4)H6;
  198. H=H+(uint4)H7;
  199.  
  200.  
  201.  
  202. Endian_Reverse32(A);
  203. Endian_Reverse32(B);
  204. Endian_Reverse32(C);
  205. Endian_Reverse32(D);
  206. Endian_Reverse32(E);
  207. Endian_Reverse32(F);
  208. Endian_Reverse32(G);
  209. Endian_Reverse32(H);
  210.  
  211.  
  212.  
  213. #ifndef SINGLE_MODE
  214. id=0;
  215. b1=A.s0;b2=B.s0;b3=C.s0;b4=D.s0;
  216. b5=(singlehash.x >> (B.s0&31))&1;
  217. b6=(singlehash.y >> (C.s0&31))&1;
  218. b7=(singlehash.z >> (D.s0&31))&1;
  219. if ((b7) && (b5) && (b6)) if (((bitmaps[b1>>13]>>(b1&31))&1) && ((bitmaps[65535*8+(b2>>13)]>>(b2&31))&1) && ((bitmaps[(16*65535)+(b3>>13)]>>(b3&31))&1) && (
  220. (bitmaps[(24*65535)+(b4>>13)]>>(b4&31))&1)) id=1;
  221. b1=A.s1;b2=B.s1;b3=C.s1;b4=D.s1;
  222. b5=(singlehash.x >> (B.s1&31))&1;
  223. b6=(singlehash.y >> (C.s1&31))&1;
  224. b7=(singlehash.z >> (D.s1&31))&1;
  225. if ((b7) && (b5) && (b6)) if (((bitmaps[b1>>13]>>(b1&31))&1) && ((bitmaps[65535*8+(b2>>13)]>>(b2&31))&1) && ((bitmaps[(16*65535)+(b3>>13)]>>(b3&31))&1) && (
  226. (bitmaps[(24*65535)+(b4>>13)]>>(b4&31))&1)) id=1;
  227. b1=A.s2;b2=B.s2;b3=C.s2;b4=D.s2;
  228. b5=(singlehash.x >> (B.s2&31))&1;
  229. b6=(singlehash.y >> (C.s2&31))&1;
  230. b7=(singlehash.z >> (D.s2&31))&1;
  231. if ((b7) && (b5) && (b6)) if (((bitmaps[b1>>13]>>(b1&31))&1) && ((bitmaps[65535*8+(b2>>13)]>>(b2&31))&1) && ((bitmaps[(16*65535)+(b3>>13)]>>(b3&31))&1) && ((bitmaps[(24*65535)+(b4>>13)]>>(b4&31))&1)) id=1;
  232. b1=A.s3;b2=B.s3;b3=C.s3;b4=D.s3;
  233. b5=(singlehash.x >> (B.s3&31))&1;
  234. b6=(singlehash.y >> (C.s3&31))&1;
  235. b7=(singlehash.z >> (D.s3&31))&1;
  236. if ((b7) && (b5) && (b6)) if (((bitmaps[b1>>13]>>(b1&31))&1) && ((bitmaps[65535*8+(b2>>13)]>>(b2&31))&1) && ((bitmaps[(16*65535)+(b3>>13)]>>(b3&31))&1) && ((bitmaps[(24*65535)+(b4>>13)]>>(b4&31))&1)) id=1;
  237. if (id==0) return;
  238. #endif
  239.  
  240.  
  241.  
  242. if (id==1)
  243. {
  244. found[0] = 1;
  245. found_ind[get_global_id(0)] = 1;
  246. }
  247.  
  248. dst[(get_global_id(0)*8)] = (uint4)(A.s0,B.s0,C.s0,D.s0);
  249. dst[(get_global_id(0)*8)+1] = (uint4)(E.s0,F.s0,G.s0,H.s0);
  250. dst[(get_global_id(0)*8)+2] = (uint4)(A.s1,B.s1,C.s1,D.s1);
  251. dst[(get_global_id(0)*8)+3] = (uint4)(E.s1,F.s1,G.s1,H.s1);
  252. dst[(get_global_id(0)*8)+4] = (uint4)(A.s2,B.s2,C.s2,D.s2);
  253. dst[(get_global_id(0)*8)+5] = (uint4)(E.s2,F.s2,G.s2,H.s2);
  254. dst[(get_global_id(0)*8)+6] = (uint4)(A.s3,B.s3,C.s3,D.s3);
  255. dst[(get_global_id(0)*8)+7] = (uint4)(E.s3,F.s3,G.s3,H.s3);
  256.  
  257.  
  258. }
  259.  
  260.  
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×