Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <stdint.h>
- #include <memory.h>
- extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
- typedef unsigned char BitSequence;
- #include "cuda_helper.h"
- #define SPH_C32(x) (x)
- #define SPH_T32(x) (x)
- static __device__ __forceinline__ uint32_t ROTL32( const uint32_t in, const int offset )
- {
- uint32_t ret;
- #if __CUDA_ARCH__ >= 320
- asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(ret) : "r"(in), "r"(in), "r"(offset));
- #else
- ret = (in << offset) | (in >> (32-offset));
- #endif
- return ret;
- }
- static __constant__ uint32_t d_alpha_n[32];
- static __constant__ uint32_t d_alpha_f[32];
- static __constant__ uint32_t d_T512[64][16];
- static const uint32_t alpha_n[] = {
- SPH_C32(0xff00f0f0), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0cccc),
- SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00),
- SPH_C32(0xaaaacccc), SPH_C32(0xf0f0ff00), SPH_C32(0xf0f0cccc),
- SPH_C32(0xaaaaff00), SPH_C32(0xccccff00), SPH_C32(0xaaaaf0f0),
- SPH_C32(0xaaaaf0f0), SPH_C32(0xff00cccc), SPH_C32(0xccccf0f0),
- SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xff00f0f0),
- SPH_C32(0xff00aaaa), SPH_C32(0xf0f0cccc), SPH_C32(0xf0f0ff00),
- SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00), SPH_C32(0xaaaacccc),
- SPH_C32(0xaaaaff00), SPH_C32(0xf0f0cccc), SPH_C32(0xaaaaf0f0),
- SPH_C32(0xccccff00), SPH_C32(0xff00cccc), SPH_C32(0xaaaaf0f0),
- SPH_C32(0xff00aaaa), SPH_C32(0xccccf0f0)
- };
- static const uint32_t alpha_f[] = {
- SPH_C32(0xcaf9639c), SPH_C32(0x0ff0f9c0), SPH_C32(0x639c0ff0),
- SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9),
- SPH_C32(0xf9c00ff0), SPH_C32(0x639ccaf9), SPH_C32(0x639c0ff0),
- SPH_C32(0xf9c0caf9), SPH_C32(0x0ff0caf9), SPH_C32(0xf9c0639c),
- SPH_C32(0xf9c0639c), SPH_C32(0xcaf90ff0), SPH_C32(0x0ff0639c),
- SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0xcaf9639c),
- SPH_C32(0xcaf9f9c0), SPH_C32(0x639c0ff0), SPH_C32(0x639ccaf9),
- SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9), SPH_C32(0xf9c00ff0),
- SPH_C32(0xf9c0caf9), SPH_C32(0x639c0ff0), SPH_C32(0xf9c0639c),
- SPH_C32(0x0ff0caf9), SPH_C32(0xcaf90ff0), SPH_C32(0xf9c0639c),
- SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0639c)
- };
- #define hamsi_s00 m0
- #define hamsi_s01 m1
- #define hamsi_s02 c0
- #define hamsi_s03 c1
- #define hamsi_s04 m2
- #define hamsi_s05 m3
- #define hamsi_s06 c2
- #define hamsi_s07 c3
- #define hamsi_s08 c4
- #define hamsi_s09 c5
- #define hamsi_s0A m4
- #define hamsi_s0B m5
- #define hamsi_s0C c6
- #define hamsi_s0D c7
- #define hamsi_s0E m6
- #define hamsi_s0F m7
- #define hamsi_s10 m8
- #define hamsi_s11 m9
- #define hamsi_s12 c8
- #define hamsi_s13 c9
- #define hamsi_s14 mA
- #define hamsi_s15 mB
- #define hamsi_s16 cA
- #define hamsi_s17 cB
- #define hamsi_s18 cC
- #define hamsi_s19 cD
- #define hamsi_s1A mC
- #define hamsi_s1B mD
- #define hamsi_s1C cE
- #define hamsi_s1D cF
- #define hamsi_s1E mE
- #define hamsi_s1F mF
- #define SBOX(a, b, c, d) { \
- uint32_t t; \
- t = (a); \
- (a) &= (c); \
- (a) ^= (d); \
- (c) ^= (b); \
- (c) ^= (a); \
- (d) |= t; \
- (d) ^= (b); \
- t ^= (c); \
- (b) = (d); \
- (d) |= t; \
- (d) ^= (a); \
- (a) &= (b); \
- t ^= (a); \
- (b) ^= (d); \
- (b) ^= t; \
- (a) = (c); \
- (c) = (b); \
- (b) = (d); \
- (d) = SPH_T32(~t); \
- }
- #define HAMSI_L(a, b, c, d) { \
- (a) = ROTL32(a, 13); \
- (c) = ROTL32(c, 3); \
- (b) ^= (a) ^ (c); \
- (d) ^= (c) ^ ((a) << 3); \
- (b) = ROTL32(b, 1); \
- (d) = ROTL32(d, 7); \
- (a) ^= (b) ^ (d); \
- (c) ^= (d) ^ ((b) << 7); \
- (a) = ROTL32(a, 5); \
- (c) = ROTL32(c, 22); \
- }
- #define ROUND_BIG(rc, alpha) { \
- hamsi_s00 ^= alpha[0x00]; \
- hamsi_s08 ^= alpha[0x08]; \
- hamsi_s10 ^= alpha[0x10]; \
- hamsi_s18 ^= alpha[0x18]; \
- hamsi_s01 ^= alpha[0x01] ^ (uint32_t)(rc); \
- hamsi_s09 ^= alpha[0x09]; \
- hamsi_s11 ^= alpha[0x11]; \
- hamsi_s19 ^= alpha[0x19]; \
- hamsi_s02 ^= alpha[0x02]; \
- hamsi_s0A ^= alpha[0x0A]; \
- hamsi_s12 ^= alpha[0x12]; \
- hamsi_s1A ^= alpha[0x1A]; \
- hamsi_s03 ^= alpha[0x03]; \
- hamsi_s0B ^= alpha[0x0B]; \
- hamsi_s13 ^= alpha[0x13]; \
- hamsi_s1B ^= alpha[0x1B]; \
- hamsi_s04 ^= alpha[0x04]; \
- hamsi_s0C ^= alpha[0x0C]; \
- hamsi_s14 ^= alpha[0x14]; \
- hamsi_s1C ^= alpha[0x1C]; \
- hamsi_s05 ^= alpha[0x05]; \
- hamsi_s0D ^= alpha[0x0D]; \
- hamsi_s15 ^= alpha[0x15]; \
- hamsi_s1D ^= alpha[0x1D]; \
- hamsi_s06 ^= alpha[0x06]; \
- hamsi_s0E ^= alpha[0x0E]; \
- hamsi_s16 ^= alpha[0x16]; \
- hamsi_s1E ^= alpha[0x1E]; \
- hamsi_s07 ^= alpha[0x07]; \
- hamsi_s0F ^= alpha[0x0F]; \
- hamsi_s17 ^= alpha[0x17]; \
- hamsi_s1F ^= alpha[0x1F]; \
- SBOX(hamsi_s00, hamsi_s08, hamsi_s10, hamsi_s18); \
- SBOX(hamsi_s01, hamsi_s09, hamsi_s11, hamsi_s19); \
- SBOX(hamsi_s02, hamsi_s0A, hamsi_s12, hamsi_s1A); \
- SBOX(hamsi_s03, hamsi_s0B, hamsi_s13, hamsi_s1B); \
- SBOX(hamsi_s04, hamsi_s0C, hamsi_s14, hamsi_s1C); \
- SBOX(hamsi_s05, hamsi_s0D, hamsi_s15, hamsi_s1D); \
- SBOX(hamsi_s06, hamsi_s0E, hamsi_s16, hamsi_s1E); \
- SBOX(hamsi_s07, hamsi_s0F, hamsi_s17, hamsi_s1F); \
- HAMSI_L(hamsi_s00, hamsi_s09, hamsi_s12, hamsi_s1B); \
- HAMSI_L(hamsi_s01, hamsi_s0A, hamsi_s13, hamsi_s1C); \
- HAMSI_L(hamsi_s02, hamsi_s0B, hamsi_s14, hamsi_s1D); \
- HAMSI_L(hamsi_s03, hamsi_s0C, hamsi_s15, hamsi_s1E); \
- HAMSI_L(hamsi_s04, hamsi_s0D, hamsi_s16, hamsi_s1F); \
- HAMSI_L(hamsi_s05, hamsi_s0E, hamsi_s17, hamsi_s18); \
- HAMSI_L(hamsi_s06, hamsi_s0F, hamsi_s10, hamsi_s19); \
- HAMSI_L(hamsi_s07, hamsi_s08, hamsi_s11, hamsi_s1A); \
- HAMSI_L(hamsi_s00, hamsi_s02, hamsi_s05, hamsi_s07); \
- HAMSI_L(hamsi_s10, hamsi_s13, hamsi_s15, hamsi_s16); \
- HAMSI_L(hamsi_s09, hamsi_s0B, hamsi_s0C, hamsi_s0E); \
- HAMSI_L(hamsi_s19, hamsi_s1A, hamsi_s1C, hamsi_s1F); \
- }
- #define P_BIG { \
- for( int r = 0; r < 6; r++ ) \
- ROUND_BIG(r, d_alpha_n); \
- }
- #define PF_BIG { \
- for( int r = 0; r < 12; r++ ) \
- ROUND_BIG(r, d_alpha_f); \
- }
- #define T_BIG { \
- /* order is important */ \
- cF = (h[0xF] ^= hamsi_s17); \
- cE = (h[0xE] ^= hamsi_s16); \
- cD = (h[0xD] ^= hamsi_s15); \
- cC = (h[0xC] ^= hamsi_s14); \
- cB = (h[0xB] ^= hamsi_s13); \
- cA = (h[0xA] ^= hamsi_s12); \
- c9 = (h[0x9] ^= hamsi_s11); \
- c8 = (h[0x8] ^= hamsi_s10); \
- c7 = (h[0x7] ^= hamsi_s07); \
- c6 = (h[0x6] ^= hamsi_s06); \
- c5 = (h[0x5] ^= hamsi_s05); \
- c4 = (h[0x4] ^= hamsi_s04); \
- c3 = (h[0x3] ^= hamsi_s03); \
- c2 = (h[0x2] ^= hamsi_s02); \
- c1 = (h[0x1] ^= hamsi_s01); \
- c0 = (h[0x0] ^= hamsi_s00); \
- }#pragma unroll 2
- for (int u = 0; u < 8; u ++) {
- db = h1[i+u];
- #pragma unroll 2
- for (int v = 0; v < 8; v ++, db >>= 1) {
- dm = -(uint32_t)(db & 1);
- m0 ^= dm & *(tp+ 0); m1 ^= dm & *(tp+ 1);
- m2 ^= dm & *(tp+ 2); m3 ^= dm & *(tp+ 3);
- m4 ^= dm & *(tp+ 4); m5 ^= dm & *(tp+ 5);
- m6 ^= dm & *(tp+ 6); m7 ^= dm & *(tp+ 7);
- m8 ^= dm & *(tp+ 8); m9 ^= dm & *(tp+ 9);
- mA ^= dm & *(tp+10); mB ^= dm & *(tp+11);
- mC ^= dm & *(tp+12); mD ^= dm & *(tp+13);
- mE ^= dm & *(tp+14); mF ^= dm & *(tp+15);
- tp += 16;
- }
- }
- for( int r = 0; r < 6; r += 2 ) {
- ROUND_BIG(r, d_alpha_n);
- ROUND_BIG(r+1, d_alpha_n);
- }
- T_BIG;
- }
- tp = &d_T512[0][0] + 112;
- m0 = *(tp+ 0); m1 = *(tp+ 1);
- m2 = *(tp+ 2); m3 = *(tp+ 3);
- m4 = *(tp+ 4); m5 = *(tp+ 5);
- m6 = *(tp+ 6); m7 = *(tp+ 7);
- m8 = *(tp+ 8); m9 = *(tp+ 9);
- mA = *(tp+10); mB = *(tp+11);
- mC = *(tp+12); mD = *(tp+13);
- mE = *(tp+14); mF = *(tp+15);
- for( int r = 0; r < 6; r += 2 ) {
- ROUND_BIG(r, d_alpha_n);
- ROUND_BIG(r+1, d_alpha_n);
- }
- T_BIG;
- tp = &d_T512[0][0] + 784;
- m0 = *(tp+ 0); m1 = *(tp+ 1);
- m2 = *(tp+ 2); m3 = *(tp+ 3);
- m4 = *(tp+ 4); m5 = *(tp+ 5);
- m6 = *(tp+ 6); m7 = *(tp+ 7);
- m8 = *(tp+ 8); m9 = *(tp+ 9);
- mA = *(tp+10); mB = *(tp+11);
- mC = *(tp+12); mD = *(tp+13);
- mE = *(tp+14); mF = *(tp+15);
- for( int r = 0; r < 12; r += 2 ) {
- ROUND_BIG(r, d_alpha_f);
- ROUND_BIG(r+1, d_alpha_f);
- }
- T_BIG;
- #pragma unroll 16
- for (int i = 0; i < 16; i++)
- Hash[i] = cuda_swab32(h[i]);
- }
- }
- __host__ void x13_hamsi512_cpu_init(int thr_id, int threads)
- {
- cudaMemcpyToSymbol( d_alpha_n, alpha_n, sizeof(uint32_t)*32, 0, cudaMemcpyHostToDevice);
- cudaMemcpyToSymbol( d_alpha_f, alpha_f, sizeof(uint32_t)*32, 0, cudaMemcpyHostToDevice);
- cudaMemcpyToSymbol( d_T512, T512, sizeof(uint32_t)*64*16, 0, cudaMemcpyHostToDevice);
- }
- __host__ void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
- {
- const int threadsperblock = 256;
- dim3 grid((threads + threadsperblock-1)/threadsperblock);
- dim3 block(threadsperblock);
- x13_hamsi512_gpu_hash_64<<<grid, block>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
- MyStreamSynchronize(NULL, order, thr_id);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement