Pastebin.com

// The kernel (this is the entrypoint of GPU code)
// Loads the 64-byte word to be hashed from g   lobal to shared memory and calls the calculation routine
__global__ void md5_calc(char *gwords, char *paddedWords3, uint *hash, int realthreads, int msg_size, int size_hash, uint launch){
    int linidx;
    uint a;
    uint b = 0;
    uint c;
    uint d;

    char msg[64] = {0};

    // assuming blockDim.y = 1 and threadIdx.y = 0, always
    int iblock= blockIdx.x + blockIdx.y * gridDim.x;
    linidx = threadIdx.x + iblock * blockDim.x;

    // this check slows down the code by ~0.4% (measured)
    if(linidx >= realthreads){
        return;
    }

    // load the dictionary word for this thread
    //char *word = &memory[0] + threadIdx.x * msg_size;

    for(a = 0; a < msg_size; a++){
        //word[(a + threadIdx.x) % msg_size] = gwords[linidx * msg_size + a];
        //msg[a] = word[(a + threadIdx.x) % msg_size];
        msg[a] = gwords[linidx * msg_size + a];

        //end of the password
        if(msg[a] == 0){
            msg[a] = 0x80;
            b = a;

            break;
        }
    }

    if(!b){
        msg[msg_size] = 0x80;

        msg[56] = msg_size * 8;
    }
    else{
        msg[56] = b * 8;
    }


    for(a = 0; a < 64; a++){
        paddedWords3[linidx * 64 + a] = msg[a];
    }

    paddedWords3[linidx * 64 + 63] = linidx;
    paddedWords3[linidx * 64 + 62] = launch;
    paddedWords3[linidx * 64 + 61] = (launch * realthreads) + linidx + 1;

    // compute MD5 hash
    md5_gpu((uint *)msg, a, b, c, d);
    //md5_gpu((uint *)msg1, a1, b1, c1, d1);

    // return the hash converted to number
    // all this work around is because of different edian order
    if(size_hash == 1){
        hash[(linidx)] = ((a & 240) >> 4);
    }
    else{
        if(size_hash == 2){
            hash[(linidx)] = a & 255;
        }
        else{
            if(size_hash == 3){
                hash[(linidx)] = ((a & 61440) >> 12) + ((a & 240) << 4) + ((a & 15) << 4);
            }
            else{
                if(size_hash == 4){
                    hash[(linidx)] = ((a & 61440) >> 8) + ((a & 3840) >> 8) + ((a & 240) << 8) + ((a & 15) << 8);
                }
            }
        }
    }
}