Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- __global__ void kernel_res_nested(TYPE *A, TYPE *B, TYPE *D, TYPE *result, TYPE size, TYPE resultSize){
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int j = blockDim.y * blockIdx.y + threadIdx.y;
- TYPE rtmp = result[i];
- if( i > 0 && i < resultSize - 1){
- TYPE start = (i >= size) ? (i % size ) + 1 : 0;
- TYPE end = (i + 1) / 2;
- if(j >= start && j <= end ){
- rtmp += ( A[j] + A[i - j] ) * ( B[j] + B[i - j] ) - ( D[j] + D[i - j] );
- }
- }
- result[i] = rtmp;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement