Advertisement
Guest User

Untitled

a guest
Nov 15th, 2018
102
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 2.24 KB | None | 0 0
  1. int boundingBoxWidth = maxx - minx;
  2.     int boundingBoxHeight = maxy - miny;
  3.     int numPixels = boundingBoxWidth * boundingBoxHeight;
  4.     int parallelThreshold = 32;
  5.     bool overThreshold = numPixels > parallelThreshold; // true if over threshold
  6.     int votes = __ballot_sync(0xFFFFFFFF, overThreshold);
  7.     int count = __popc(votes);
  8.  
  9.  
  10.     for (int bigTriangle = 0; bigTriangle < count; bigTriangle++){ // 1.
  11.          int position = __ffs(votes);
  12.          //votes &= ~(1UL << position);  // Set bit at position in votes to 0
  13.          votes ^= 1UL << position;
  14.          unsigned int triangleIndexP = __shfl_sync(0xFFFFFFFF, triangleIndex, position);
  15.          int minxP = __shfl_sync(0xFFFFFFFF, minx, position);
  16.          int maxxP = __shfl_sync(0xFFFFFFFF, maxx, position);
  17.          int minyP = __shfl_sync(0xFFFFFFFF, miny, position);
  18.          int maxyP = __shfl_sync(0xFFFFFFFF, maxy, position);
  19.          int numPixelsP = __shfl_sync(0xFFFFFFFF, numPixels, position);
  20.          float4 v0P = make_float4(0,0,0,0);
  21.          float4 v1P = make_float4(0,0,0,0);
  22.          float4 v2P = make_float4(0,0,0,0);
  23.          v0P.x = __shfl_sync(0xFFFFFFFF, v0.x, position);
  24.          v0P.y = __shfl_sync(0xFFFFFFFF, v0.y, position);
  25.          v0P.z = __shfl_sync(0xFFFFFFFF, v0.z, position);
  26.          v0P.w = __shfl_sync(0xFFFFFFFF, v0.w, position);
  27.          v1P.x = __shfl_sync(0xFFFFFFFF, v1.x, position);
  28.          v1P.y = __shfl_sync(0xFFFFFFFF, v1.y, position);
  29.          v1P.z = __shfl_sync(0xFFFFFFFF, v1.z, position);
  30.          v1P.w = __shfl_sync(0xFFFFFFFF, v1.w, position);
  31.          v2P.x = __shfl_sync(0xFFFFFFFF, v2.x, position);
  32.          v2P.y = __shfl_sync(0xFFFFFFFF, v2.y, position);
  33.          v2P.z = __shfl_sync(0xFFFFFFFF, v2.z, position);
  34.          v2P.w = __shfl_sync(0xFFFFFFFF, v2.w, position);
  35.          for (int id = 0; id < numPixelsP; id += 32 ){
  36.              // int currentIndex = id + (int(fmod(float(threadIdx.x), float(32))));
  37.              int threadId = threadIdx.x + threadIdx.y * blockDim.x;
  38.              int currentIndex = id + (threadId % 32);
  39.              if (currentIndex > numPixelsP) {break;}
  40.              //int xP = minxP + int(fmod(float(currentIndex), float(maxxP - minxP)));//(currentIndex % (maxxP - minxP));
  41.              int xP = minxP + (currentIndex % (maxxP - minxP));
  42.              int yP = minyP + int(currentIndex / (maxxP - minxP));
  43.              rasteriseSinglePixel(v0P, v1P, v2P, xP, yP, mesh, triangleIndexP, frameBuffer, depthBuffer, width, height);
  44.          }
  45.     }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement