Advertisement
Guest User

Untitled

a guest
Jan 22nd, 2017
92
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 3.82 KB | None | 0 0
  1. bool Model::filterWorker(std::vector<cv::Mat> &inputPlanes,
  2.     std::vector<cv::Mat> &weightMatrices,
  3.     std::vector<cv::Mat> &outputPlanes)
  4. {
  5.     cv::ocl::setUseOpenCL(false); // disable OpenCL Support(temporary)
  6.  
  7.     cv::Size ipSize = inputPlanes[0].size();
  8.     cv::Size wmSize = weightMatrices[0].size();
  9.     // filter processing
  10.     // input : inputPlanes
  11.     // kernel : weightMatrices
  12.     cl_uint planesCount = inputPlanes.size();
  13.     size_t ipSizeArea = ipSize.area();
  14.     size_t wmSizeArea = wmSize.area();
  15.     size_t inputPlanesSize = inputPlanes.size() * ipSize.area();
  16.     size_t weightMatricesSize = inputPlanes.size() * wmSize.area();
  17.     cl_mem memPlanes = clCreateBuffer(context, CL_MEM_READ_ONLY, inputPlanesSize * sizeof(float), nullptr, nullptr);
  18.     cl_mem memMatrices = clCreateBuffer(context, CL_MEM_READ_ONLY, weightMatricesSize * sizeof(float), nullptr, nullptr);
  19.     cl_mem memOutput = clCreateBuffer(context, CL_MEM_WRITE_ONLY, ipSizeArea * sizeof(float), nullptr, nullptr);
  20.     clSetKernelArg(Model::convKernel, 0, sizeof(cl_mem), &memPlanes);
  21.     clSetKernelArg(Model::convKernel, 1, sizeof(cl_mem), &memMatrices);
  22.     clSetKernelArg(Model::convKernel, 2, sizeof(cl_mem), &memOutput);
  23.     clSetKernelArg(Model::convKernel, 3, sizeof(cl_uint), &planesCount);
  24.     float *inputPlanesArr = new float[inputPlanesSize];
  25.     float *weightMatricesArr = new float[weightMatricesSize];
  26.     cv::Mat intermediatePlane = cv::Mat::zeros(ipSize, CV_32FC1);
  27.     /*const size_t globalWorkSize[] =
  28.     {
  29.         (ipSize.width % 8 != 0 ? 8 * (ipSize.width / 8 + 1) : ipSize.width),
  30.         (ipSize.height % 8 != 0 ? 8 * (ipSize.height / 8 + 1) : ipSize.height)
  31.     };*/
  32.     const size_t localWorkSize[] = { 8, 8 };
  33.     const size_t globalWorkSize[] =
  34.     {
  35.         ipSize.width,
  36.         ipSize.height
  37.     };
  38.     for (unsigned int opIndex = 0; opIndex < outputPlanes.size(); opIndex++)
  39.     {
  40.         unsigned int wMatIndex = nInputPlanes * opIndex;
  41.         for (int ipIndex = 0; ipIndex < nInputPlanes; ipIndex++)
  42.         {
  43.             for (size_t i = 0; i < ipSize.height; ++i)
  44.             {
  45.                 for (size_t j = 0; j < ipSize.width; ++j)
  46.                 {
  47.                     inputPlanesArr[ipSizeArea * ipIndex + i * ipSize.width + j] = inputPlanes[ipIndex].at<float>(i, j);
  48.                 }
  49.             }
  50.             for (size_t i = 0; i < wmSize.height; ++i)
  51.             {
  52.                 for (size_t j = 0; j < wmSize.width; ++j)
  53.                 {
  54.                     weightMatricesArr[wmSizeArea * ipIndex + i * wmSize.width + j] = weightMatrices[wMatIndex + ipIndex].at<float>(i, j);
  55.                 }
  56.             }
  57.         }
  58.         cl_int err = clEnqueueWriteBuffer(queue, memPlanes, CL_FALSE, 0, inputPlanesSize * sizeof(float), inputPlanesArr, NULL, nullptr, nullptr);
  59.         //std::cout << "err " << err << " ";
  60.         err = clEnqueueWriteBuffer(queue, memMatrices, CL_FALSE, 0, weightMatricesSize * sizeof(float), weightMatricesArr, NULL, nullptr, nullptr);
  61.         //std::cout << err << " ";
  62.         //volatile size_t ipSizeW = ipSize.width;
  63.         //volatile size_t ipSizeH = ipSize.height;
  64.         //err = clEnqueueNDRangeKernel(queue, Model::convKernel, 2, NULL, globalWorkSize, localWorkSize, NULL, nullptr, nullptr);
  65.         err = clEnqueueNDRangeKernel(queue, Model::convKernel, 2, NULL, globalWorkSize, nullptr, NULL, nullptr, nullptr);
  66.         //std::cout << err << " ";
  67.         err = clEnqueueReadBuffer(queue, memOutput, CL_TRUE, 0, ipSizeArea * sizeof(float), (float *)intermediatePlane.data, NULL, nullptr, nullptr);
  68.         //std::cout << err << std::endl;
  69.         cv::add(intermediatePlane, biases[opIndex], intermediatePlane);
  70.         cv::UMat moreThanZero = cv::UMat(ipSize, CV_32FC1, 0.0);
  71.         cv::UMat lessThanZero = cv::UMat(ipSize, CV_32FC1, 0.0);
  72.         cv::max(intermediatePlane, 0.0, moreThanZero);
  73.         cv::min(intermediatePlane, 0.0, lessThanZero);
  74.         cv::scaleAdd(lessThanZero, 0.1, moreThanZero, intermediatePlane);
  75.         intermediatePlane.copyTo(outputPlanes[opIndex]);
  76.     } // for index
  77.     delete[] weightMatricesArr;
  78.     delete[] inputPlanesArr;
  79.     clReleaseMemObject(memPlanes);
  80.     clReleaseMemObject(memMatrices);
  81.     clReleaseMemObject(memOutput);
  82.     return true;
  83. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement