Advertisement
Guest User

dimensions.patch

a guest
May 10th, 2015
200
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Diff 2.55 KB | None | 0 0
  1. Index: kmeansGPU.cu
  2. ===================================================================
  3. --- kmeansGPU.cu        (revision 172)
  4. +++ kmeansGPU.cu        (working copy)
  5. @@ -95,23 +95,23 @@
  6.      unsigned int tid = threadIdx.x;                         // thread ID in block
  7.      
  8.      // for each element
  9. -    if (t < N)
  10. +    FLOAT_TYPE minDist  = 0.0;
  11. +    int   minIndex = 0;
  12. +    // for each centroid
  13. +    for (unsigned int k = 0; k < K; k++)
  14.      {
  15. -        FLOAT_TYPE minDist  = 0.0;
  16. -        int   minIndex = 0;
  17. -        // for each centroid
  18. -        for (unsigned int k = 0; k < K; k++)
  19. +        // compute distance
  20. +        FLOAT_TYPE dist = 0.0;
  21. +        unsigned int offsetD = 0;
  22. +        // loop over all dimensions in segments of size tpb
  23. +        while (offsetD < D)
  24.          {
  25. -            // compute distance
  26. -            FLOAT_TYPE dist = 0.0;
  27. -            unsigned int offsetD = 0;
  28. -            // loop over all dimensions in segments of size tpb
  29. -            while (offsetD < D)
  30. +            // read up to tpb dimensions of centroid K (coalesced)
  31. +            if (offsetD + tid < D) s_center[tid] = CTR[k * D + offsetD + tid];
  32. +            __syncthreads();
  33. +            // for each of the following tpb (or D - offsetD) dimensions
  34. +            if (t < N)
  35.              {
  36. -                // read up to tpb dimensions of centroid K (coalesced)
  37. -                if (offsetD + tid < D) s_center[tid] = CTR[k * D + offsetD + tid];
  38. -                __syncthreads();
  39. -                // for each of the following tpb (or D - offsetD) dimensions
  40.                  for (unsigned int d = offsetD; d < min(offsetD + blockDim.x, D); d++)
  41.                  {
  42.                      // broadcast centroid position and compute distance to data
  43. @@ -118,9 +118,12 @@
  44.                      // point along dimension; reading of X is coalesced
  45.                      dist += distanceComponentGPU(s_center + (d - offsetD), X + (d * N + t));
  46.                  }
  47. -                offsetD += blockDim.x;
  48. -                __syncthreads();
  49.              }
  50. +            offsetD += blockDim.x;
  51. +            __syncthreads();
  52. +        }
  53. +        if (t < N)
  54. +        {
  55.              dist = distanceFinalizeGPU<FLOAT_TYPE>(1, &dist);
  56.              // if distance to centroid smaller than previous best, reassign
  57.              if (dist < minDist || k == 0)
  58. @@ -129,6 +132,9 @@
  59.                  minIndex = k;
  60.              }
  61.          }
  62. +    }
  63. +    if (t < N)
  64. +    {
  65.          // now write index of closest centroid to global mem (coalesced)
  66.          ASSIGN[t] = minIndex;
  67.      }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement