Guest User

Untitled

a guest
Jan 12th, 2018
118
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. int AsyncDataTransfer::executeAsyncKernel(void)
  2. {
  3. int status = SDK_SUCCESS;
  4. std::vector<cl::Event> eventList;
  5. eventList.reserve(2);
  6. cl::Event *writeEvents = new cl::Event[iterations * numKernels];
  7. cl::Event *kernelEvents = new cl::Event[iterations * numKernels];
  8. cl::Event *readEvents = new cl::Event[iterations * numKernels];
  9. writeQueue.finish();
  10. kernelQueue.finish();
  11. readQueue.finish();
  12.  
  13. sampleTimer->resetTimer(cpuTimer);
  14. sampleTimer->startTimer(cpuTimer);
  15. for(int i = 0; i < iterations; i++)
  16. {
  17. for(int k = 0; k < numKernels; ++k)
  18. {
  19. // Write
  20. eventList.clear();
  21. if(i > 0)
  22. {
  23. eventList.push_back(kernelEvents[((i-1) * numKernels) + k]);
  24. }
  25. status |= writeQueue.enqueueWriteBuffer(inputBuffer[k], CL_FALSE, 0, bufferSize,
  26. input[k],
  27. &eventList, &(writeEvents[(i * numKernels) + k]));
  28.  
  29. // Execute Kernel
  30. eventList.clear();
  31. if(i > 0)
  32. {
  33. eventList.push_back(readEvents[((i-1) * numKernels) + k]);
  34. }
  35. eventList.push_back(writeEvents[(i * numKernels) + k]);
  36. status |= kernel.setArg(0, inputBuffer[k]);
  37. status |= kernel.setArg(1, outputBuffer[k]);
  38. status |= kernel.setArg(2, k);
  39. status |= kernelQueue.enqueueNDRangeKernel(kernel, cl::NullRange, globalThreads,
  40. localThreads,
  41. &eventList, &kernelEvents[(i * numKernels) + k]);
  42.  
  43. // Read
  44. eventList.clear();
  45. eventList.push_back(kernelEvents[(i * numKernels) + k]);
  46. status |= readQueue.enqueueReadBuffer(outputBuffer[k], CL_FALSE, 0, bufferSize,
  47. output[k],
  48. &eventList, &(readEvents[(i * numKernels) + k]));
  49. }
  50. }
  51. // Flush all the queues
  52. status |= writeQueue.flush();
  53. status |= kernelQueue.flush();
  54. status |= readQueue.flush();
  55.  
  56. // Wait for finish all the operations
  57. status |= writeQueue.finish();
  58. status |= kernelQueue.finish();
  59. status |= readQueue.finish();
  60.  
  61. sampleTimer-> stopTimer(cpuTimer);
  62. asyncTime = (sampleTimer-> readTimer(cpuTimer) * 1000) /
  63. (numKernels * iterations);
  64.  
  65. // Free events
  66. delete [] writeEvents;
  67. delete [] kernelEvents;
  68. delete [] readEvents;
  69.  
  70. return status;
  71. }
RAW Paste Data