Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- const int MAX_SIZE = MAX_SIZE_OF_IMAGE;
- const int nTPB = 256;
- __device__ int base = 0;
- __device__ int dev_indices[MAX_SIZE];
- template <typename T>
- __global__ void findvalindex(const T* __restrict__ imagedata, const T val, const int imagelen){
- int idx=threadIdx.x+blockDim.x*blockIdx.x;
- if (idx < imagelen)
- if (imagedata[idx] == val){
- int mylocation = atomicAdd(&base, 1);
- dev_indices[mylocation] = idx;}
- }
- // assume image data is already copied to the device using device pointer image
- findvalindex<<<(MAX_SIZE+nTPB-1)/nTPB, nTPB>>>(image, 255, MAX_SIZE);
- int num_indices;
- cudaMemcpyFromSymbol(&num_indices, base, sizeof(int));
- int *indices = new int[num_indices];
- cudaMemcpyFromSymbol(indices, dev_indices, sizeof(int)*num_indices));
- // indices now contains the list of matching indices
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement