Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- bool Model::filterWorker(std::vector<cv::Mat> &inputPlanes,
- std::vector<cv::Mat> &weightMatrices,
- std::vector<cv::Mat> &outputPlanes)
- {
- cv::ocl::setUseOpenCL(false); // disable OpenCL Support(temporary)
- cv::Size ipSize = inputPlanes[0].size();
- cv::Size wmSize = weightMatrices[0].size();
- // filter processing
- // input : inputPlanes
- // kernel : weightMatrices
- cl_uint planesCount = inputPlanes.size();
- size_t ipSizeArea = ipSize.area();
- size_t wmSizeArea = wmSize.area();
- size_t inputPlanesSize = inputPlanes.size() * ipSize.area();
- size_t weightMatricesSize = inputPlanes.size() * wmSize.area();
- cl_mem memPlanes = clCreateBuffer(context, CL_MEM_READ_ONLY, inputPlanesSize * sizeof(float), nullptr, nullptr);
- cl_mem memMatrices = clCreateBuffer(context, CL_MEM_READ_ONLY, weightMatricesSize * sizeof(float), nullptr, nullptr);
- cl_mem memOutput = clCreateBuffer(context, CL_MEM_WRITE_ONLY, ipSizeArea * sizeof(float), nullptr, nullptr);
- clSetKernelArg(Model::convKernel, 0, sizeof(cl_mem), &memPlanes);
- clSetKernelArg(Model::convKernel, 1, sizeof(cl_mem), &memMatrices);
- clSetKernelArg(Model::convKernel, 2, sizeof(cl_mem), &memOutput);
- clSetKernelArg(Model::convKernel, 3, sizeof(cl_uint), &planesCount);
- float *inputPlanesArr = new float[inputPlanesSize];
- float *weightMatricesArr = new float[weightMatricesSize];
- cv::Mat intermediatePlane = cv::Mat::zeros(ipSize, CV_32FC1);
- /*const size_t globalWorkSize[] =
- {
- (ipSize.width % 8 != 0 ? 8 * (ipSize.width / 8 + 1) : ipSize.width),
- (ipSize.height % 8 != 0 ? 8 * (ipSize.height / 8 + 1) : ipSize.height)
- };*/
- const size_t localWorkSize[] = { 8, 8 };
- const size_t globalWorkSize[] =
- {
- ipSize.width,
- ipSize.height
- };
- for (unsigned int opIndex = 0; opIndex < outputPlanes.size(); opIndex++)
- {
- unsigned int wMatIndex = nInputPlanes * opIndex;
- for (int ipIndex = 0; ipIndex < nInputPlanes; ipIndex++)
- {
- for (size_t i = 0; i < ipSize.height; ++i)
- {
- for (size_t j = 0; j < ipSize.width; ++j)
- {
- inputPlanesArr[ipSizeArea * ipIndex + i * ipSize.width + j] = inputPlanes[ipIndex].at<float>(i, j);
- }
- }
- for (size_t i = 0; i < wmSize.height; ++i)
- {
- for (size_t j = 0; j < wmSize.width; ++j)
- {
- weightMatricesArr[wmSizeArea * ipIndex + i * wmSize.width + j] = weightMatrices[wMatIndex + ipIndex].at<float>(i, j);
- }
- }
- }
- cl_int err = clEnqueueWriteBuffer(queue, memPlanes, CL_FALSE, 0, inputPlanesSize * sizeof(float), inputPlanesArr, NULL, nullptr, nullptr);
- //std::cout << "err " << err << " ";
- err = clEnqueueWriteBuffer(queue, memMatrices, CL_FALSE, 0, weightMatricesSize * sizeof(float), weightMatricesArr, NULL, nullptr, nullptr);
- //std::cout << err << " ";
- //volatile size_t ipSizeW = ipSize.width;
- //volatile size_t ipSizeH = ipSize.height;
- //err = clEnqueueNDRangeKernel(queue, Model::convKernel, 2, NULL, globalWorkSize, localWorkSize, NULL, nullptr, nullptr);
- err = clEnqueueNDRangeKernel(queue, Model::convKernel, 2, NULL, globalWorkSize, nullptr, NULL, nullptr, nullptr);
- //std::cout << err << " ";
- err = clEnqueueReadBuffer(queue, memOutput, CL_TRUE, 0, ipSizeArea * sizeof(float), (float *)intermediatePlane.data, NULL, nullptr, nullptr);
- //std::cout << err << std::endl;
- cv::add(intermediatePlane, biases[opIndex], intermediatePlane);
- cv::UMat moreThanZero = cv::UMat(ipSize, CV_32FC1, 0.0);
- cv::UMat lessThanZero = cv::UMat(ipSize, CV_32FC1, 0.0);
- cv::max(intermediatePlane, 0.0, moreThanZero);
- cv::min(intermediatePlane, 0.0, lessThanZero);
- cv::scaleAdd(lessThanZero, 0.1, moreThanZero, intermediatePlane);
- intermediatePlane.copyTo(outputPlanes[opIndex]);
- } // for index
- delete[] weightMatricesArr;
- delete[] inputPlanesArr;
- clReleaseMemObject(memPlanes);
- clReleaseMemObject(memMatrices);
- clReleaseMemObject(memOutput);
- return true;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement