Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- Index: kmeansGPU.cu
- ===================================================================
- --- kmeansGPU.cu (revision 172)
- +++ kmeansGPU.cu (working copy)
- @@ -95,23 +95,23 @@
- unsigned int tid = threadIdx.x; // thread ID in block
- // for each element
- - if (t < N)
- + FLOAT_TYPE minDist = 0.0;
- + int minIndex = 0;
- + // for each centroid
- + for (unsigned int k = 0; k < K; k++)
- {
- - FLOAT_TYPE minDist = 0.0;
- - int minIndex = 0;
- - // for each centroid
- - for (unsigned int k = 0; k < K; k++)
- + // compute distance
- + FLOAT_TYPE dist = 0.0;
- + unsigned int offsetD = 0;
- + // loop over all dimensions in segments of size tpb
- + while (offsetD < D)
- {
- - // compute distance
- - FLOAT_TYPE dist = 0.0;
- - unsigned int offsetD = 0;
- - // loop over all dimensions in segments of size tpb
- - while (offsetD < D)
- + // read up to tpb dimensions of centroid K (coalesced)
- + if (offsetD + tid < D) s_center[tid] = CTR[k * D + offsetD + tid];
- + __syncthreads();
- + // for each of the following tpb (or D - offsetD) dimensions
- + if (t < N)
- {
- - // read up to tpb dimensions of centroid K (coalesced)
- - if (offsetD + tid < D) s_center[tid] = CTR[k * D + offsetD + tid];
- - __syncthreads();
- - // for each of the following tpb (or D - offsetD) dimensions
- for (unsigned int d = offsetD; d < min(offsetD + blockDim.x, D); d++)
- {
- // broadcast centroid position and compute distance to data
- @@ -118,9 +118,12 @@
- // point along dimension; reading of X is coalesced
- dist += distanceComponentGPU(s_center + (d - offsetD), X + (d * N + t));
- }
- - offsetD += blockDim.x;
- - __syncthreads();
- }
- + offsetD += blockDim.x;
- + __syncthreads();
- + }
- + if (t < N)
- + {
- dist = distanceFinalizeGPU<FLOAT_TYPE>(1, &dist);
- // if distance to centroid smaller than previous best, reassign
- if (dist < minDist || k == 0)
- @@ -129,6 +132,9 @@
- minIndex = k;
- }
- }
- + }
- + if (t < N)
- + {
- // now write index of closest centroid to global mem (coalesced)
- ASSIGN[t] = minIndex;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement