Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // CUDA kernel
- //
- // This is an answer to a StackOverflow question: "Search an ordered array in a CUDA kernel"
- // -- "I have an ordered array a of n unsigned integers (the first one is always 0) stored in shared memory
- // each thread has to find the array index i such that a[i] <= threadIdx.x and a[i + 1] > threadIdx.x."
- __global__ void lpw_indexed_search( int *a, int n )
- {
- int idx = threadIdx.x;
- __shared__ int aux[ MAX_THREADS_PER_BLOCK /*1024*/ ];
- aux[ idx ] = 0;
- if( idx < n )
- atomicAdd( &aux[ a[idx] ], 1); // atomics in case there are duplicates
- __syncthreads();
- int tmp;
- // Scan
- for( int j = 1; j <= MAX_THREADS_PER_BLOCK / 2; j <<= 1 )
- {
- if( idx >= j ) tmp = aux[ idx - j ];
- __syncthreads();
- if( idx >= j ) aux[ idx ] += tmp;
- __syncthreads();
- }
- // result in "i"
- int i = aux[ idx ] - 1;
- // use "i" here...
- // ...
- }
Add Comment
Please, Sign In to add comment