SHARE
TWEET

Untitled

a guest Jan 12th, 2018 81 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. int AsyncDataTransfer::executeAsyncKernel(void)
  2. {
  3.     int status = SDK_SUCCESS;
  4.     std::vector<cl::Event> eventList;
  5.     eventList.reserve(2);
  6.     cl::Event *writeEvents = new cl::Event[iterations * numKernels];
  7.     cl::Event *kernelEvents = new cl::Event[iterations * numKernels];
  8.     cl::Event *readEvents = new cl::Event[iterations * numKernels];
  9.     writeQueue.finish();
  10.     kernelQueue.finish();
  11.     readQueue.finish();
  12.  
  13.     sampleTimer->resetTimer(cpuTimer);
  14.     sampleTimer->startTimer(cpuTimer);
  15.     for(int i = 0; i < iterations; i++)
  16.     {
  17.         for(int k = 0; k < numKernels; ++k)
  18.         {
  19.             // Write
  20.             eventList.clear();
  21.             if(i > 0)
  22.             {
  23.                 eventList.push_back(kernelEvents[((i-1) * numKernels) + k]);
  24.             }
  25.             status |= writeQueue.enqueueWriteBuffer(inputBuffer[k], CL_FALSE, 0, bufferSize,
  26.                                                     input[k],
  27.                                                     &eventList, &(writeEvents[(i * numKernels) + k]));
  28.  
  29.             // Execute Kernel
  30.             eventList.clear();
  31.             if(i > 0)
  32.             {
  33.                 eventList.push_back(readEvents[((i-1) * numKernels) + k]);
  34.             }
  35.             eventList.push_back(writeEvents[(i * numKernels) + k]);
  36.             status |= kernel.setArg(0, inputBuffer[k]);
  37.             status |= kernel.setArg(1, outputBuffer[k]);
  38.             status |= kernel.setArg(2, k);
  39.             status |= kernelQueue.enqueueNDRangeKernel(kernel, cl::NullRange, globalThreads,
  40.                       localThreads,
  41.                       &eventList, &kernelEvents[(i * numKernels) + k]);
  42.  
  43.             // Read
  44.             eventList.clear();
  45.             eventList.push_back(kernelEvents[(i * numKernels) + k]);
  46.             status |= readQueue.enqueueReadBuffer(outputBuffer[k], CL_FALSE, 0, bufferSize,
  47.                                                   output[k],
  48.                                                   &eventList, &(readEvents[(i * numKernels) + k]));
  49.         }
  50.     }
  51.     // Flush all the queues
  52.     status |= writeQueue.flush();
  53.     status |= kernelQueue.flush();
  54.     status |= readQueue.flush();
  55.  
  56.     // Wait for finish all the operations
  57.     status |= writeQueue.finish();
  58.     status |= kernelQueue.finish();
  59.     status |= readQueue.finish();
  60.  
  61.     sampleTimer-> stopTimer(cpuTimer);
  62.     asyncTime = (sampleTimer-> readTimer(cpuTimer) * 1000) /
  63.                 (numKernels * iterations);
  64.  
  65.     // Free events
  66.     delete [] writeEvents;
  67.     delete [] kernelEvents;
  68.     delete [] readEvents;
  69.  
  70.     return status;
  71. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top