Advertisement
Guest User

Untitled

a guest
Mar 30th, 2017
49
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.80 KB | None | 0 0
  1. const int MAX_SIZE = MAX_SIZE_OF_IMAGE;
  2. const int nTPB = 256;
  3. __device__ int base = 0;
  4. __device__ int dev_indices[MAX_SIZE];
  5. template <typename T>
  6. __global__ void findvalindex(const T* __restrict__ imagedata, const T val, const int imagelen){
  7. int idx=threadIdx.x+blockDim.x*blockIdx.x;
  8. if (idx < imagelen)
  9. if (imagedata[idx] == val){
  10. int mylocation = atomicAdd(&base, 1);
  11. dev_indices[mylocation] = idx;}
  12. }
  13.  
  14. // assume image data is already copied to the device using device pointer image
  15. findvalindex<<<(MAX_SIZE+nTPB-1)/nTPB, nTPB>>>(image, 255, MAX_SIZE);
  16. int num_indices;
  17. cudaMemcpyFromSymbol(&num_indices, base, sizeof(int));
  18. int *indices = new int[num_indices];
  19. cudaMemcpyFromSymbol(indices, dev_indices, sizeof(int)*num_indices));
  20. // indices now contains the list of matching indices
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement