Advertisement
Guest User

Untitled

a guest
Jun 29th, 2012
32
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 2.79 KB | None | 0 0
  1.  
  2.   bool sort(Context * c,int targetDevice,int n,cl_mem in,cl_mem out) const
  3.   {
  4.     c->enqueueCopy(targetDevice,in,out,0,0,n*sizeof(data_t),EventVector());
  5.     c->enqueueBarrier(targetDevice); // sync
  6.  
  7.     cl_mem buffers[2];
  8.     buffers[0] = in;
  9.     buffers[1] = out;
  10.     for (int length=1;length<n;length<<=1)
  11.     {
  12.       int inc = length;
  13.       std::list<int> strategy; // vector defining the sequence of reductions
  14.       {
  15.         int ii = inc;
  16.         while (ii>0)
  17.         {
  18.           if (ii==128 || ii==32 || ii==8) { strategy.push_back(-1); break; } // C kernel
  19.           int d = 1; // default is 1 bit
  20.           if (0) d = 1;
  21. #if 1
  22.           // Force jump to 128
  23.           else if (ii==256) d = 1;
  24.           else if (ii==512 && (ALLOWB & 4)) d = 2;
  25.           else if (ii==1024 && (ALLOWB & 8)) d = 3;
  26.           else if (ii==2048 && (ALLOWB & 16)) d = 4;
  27. #endif
  28.           else if (ii>=8 && (ALLOWB & 16)) d = 4;
  29.           else if (ii>=4 && (ALLOWB & 8)) d = 3;
  30.           else if (ii>=2 && (ALLOWB & 4)) d = 2;
  31.           else d = 1;
  32.           strategy.push_back(d);
  33.           ii >>= d;
  34.         }
  35.       }
  36.  
  37.       while (inc > 0)
  38.       {
  39.         int ninc = 0;
  40.         int kid = -1;
  41.         int doLocal = 0;
  42.         int nThreads = 0;
  43.         int d = strategy.front(); strategy.pop_front();
  44.  
  45.         switch (d)
  46.         {
  47.         case -1:
  48.           kid = PARALLEL_BITONIC_C4_KERNEL;
  49.           ninc = -1; // reduce all bits
  50.           doLocal = 4;
  51.           nThreads = n >> 2;
  52.           break;
  53.         case 4:
  54.           kid = PARALLEL_BITONIC_B16_KERNEL;
  55.           ninc = 4;
  56.           nThreads = n >> ninc;
  57.           break;
  58.         case 3:
  59.           kid = PARALLEL_BITONIC_B8_KERNEL;
  60.           ninc = 3;
  61.           nThreads = n >> ninc;
  62.           break;
  63.         case 2:
  64.           kid = PARALLEL_BITONIC_B4_KERNEL;
  65.           ninc = 2;
  66.           nThreads = n >> ninc;
  67.           break;
  68.         case 1:
  69.           kid = PARALLEL_BITONIC_B2_KERNEL;
  70.           ninc = 1;
  71.           nThreads = n >> ninc;
  72.           break;
  73.         default:
  74.           printf("Strategy error!\n");
  75.           break;
  76.         }
  77.         int wg = c->getMaxWorkgroupSize(targetDevice,kid);
  78.         wg = std::min(wg,256);
  79.         wg = std::min(wg,nThreads);
  80.         c->clearArgs(kid);
  81.         c->pushArg(kid,out);
  82.         c->pushArg(kid,inc); // INC passed to kernel
  83.         c->pushArg(kid,length<<1); // DIR passed to kernel
  84.         if (doLocal>0) c->pushLocalArg(kid,doLocal*wg*sizeof(data_t)); // DOLOCAL values / thread
  85.         c->enqueueKernel(targetDevice,kid,nThreads,1,wg,1,EventVector());
  86.         c->enqueueBarrier(targetDevice); // sync
  87.         // if (mLastN != n) printf("LENGTH=%d INC=%d KID=%d\n",length,inc,kid); // DEBUG
  88.         if (ninc < 0) break; // done
  89.         inc >>= ninc;
  90.       }
  91.     }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement