1. #include <CL/cl.h>
  2. #include <cstdlib>
  3. #include <cstdio>
  4. #include <iostream>
  5. #include <vector>
  6.  
  7. const int W = 960; // works with 961 for example...
  8. const int H = 540;
  9.  
  10. const char * kernel =
  11.     "__kernel void bilateralFilter(__read_only image2d_t img, __global float * dst, int width, int height)"
  12.     "{"
  13.         "const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;"
  14.         "const int x = get_global_id(0);"
  15.         "const int y = get_global_id(1);"
  16.         "int2 coord = (int2)(x, y);"
  17.         "float4 sum = read_imagef(img, smp, coord);"
  18.         "const int id = y * width + x;"
  19.         "dst[id*3] = sum.x;"
  20.         "dst[id*3+1] = sum.y;"
  21.         "dst[id*3+2] = sum.z;"
  22.     "}\n";
  23.  
  24. //! Creates a 960x540 rgba int8 texture
  25. static cl_mem createCLTexture(cl_context ctx, cl_int * errcode)
  26. {
  27.     std::vector<unsigned char> data;
  28.     data.resize(W*H*4);
  29.  
  30.     for(int y = 0; y < H; y++)
  31.     {
  32.         for(int x = 0; x < W; x++)
  33.         {
  34.             data[4*(y*W+x)+0] = x % 255; //((x+y) & 0x1) ? 0xFF : 0x0;
  35.             data[4*(y*W+x)+1] = y % 255; //((x+y) & 0x1) ? 0xFF : 0x0;
  36.             data[4*(y*W+x)+2] = ((x+y) & 0x1) ? 0xFF : 0x0;
  37.             data[4*(y*W+x)+3] = ((x+y) & 0x1) ? 0xFF : 0x0;
  38.         }
  39.     }
  40.  
  41.     cl_image_format fmt;
  42.     fmt.image_channel_order = CL_RGBA;
  43.     fmt.image_channel_data_type = CL_UNORM_INT8; //CL_UNSIGNED_INT8;
  44.  
  45.     return clCreateImage2D(ctx, CL_MEM_COPY_HOST_PTR | CL_MEM_READ_ONLY, &fmt, W, H, W*4, &data[0], errcode);
  46. }
  47.  
  48.  
  49. int main (int argc, const char * argv[])
  50. {
  51.     std::cout << kernel << std::endl;
  52.  
  53.     // get platform -------------------------------------------------------------------------------------
  54.     cl_platform_id platform = 0;
  55.     cl_uint numPlatformsFound = 0;
  56.     if(CL_SUCCESS != clGetPlatformIDs(1, &platform, &numPlatformsFound)) {
  57.         std::cout << "Could not query platforms." << std::endl;
  58.         return -1;
  59.     }
  60.     std::cout << "Found " << numPlatformsFound << " Platforms" << std::endl;
  61.     if(0 == numPlatformsFound) return -1;
  62.  
  63.     // get device ---------------------------------------------------------------------------------------
  64.     cl_device_id device = 0;
  65.     cl_uint numDevicesFound = 0;
  66.     if(CL_SUCCESS != clGetDeviceIDs(platform, CL_DEVICE_TYPE_DEFAULT, 1, &device, &numDevicesFound)) {
  67.         std::cout << "Could not query devices." << std::endl;
  68.         return -1;
  69.     }
  70.     std::cout << "Found " << numDevicesFound << " Devices" << std::endl;
  71.     if(0 == numDevicesFound) return -1;
  72.  
  73.     // create context ------------------------------------------------------------------------------------
  74.     cl_context context = 0;
  75.     cl_int errcode;
  76.     context = clCreateContext(NULL, 1, &device, NULL, NULL, &errcode);
  77.     if(CL_SUCCESS != errcode && 0 != context) {
  78.         std::cout << "Context creation failed!" << std::endl;
  79.         std::cout << "Error code: " << errcode;
  80.         return -1;
  81.     }
  82.     std::cout << "Context created.." << std::endl;
  83.  
  84.  
  85.     // get queue ----------------------------------------------------------------------------------------
  86.     cl_command_queue queue = 0;
  87.     queue = clCreateCommandQueue(context, device, (cl_command_queue_properties)0, &errcode);
  88.     if(CL_SUCCESS != errcode && 0 != queue) {
  89.         std::cout << "Command Queue creation failed!" << std::endl;
  90.         return -1;
  91.     }
  92.     std::cout << "Command Queue created.." << std::endl;
  93.  
  94.  
  95.     // create texture -----------------------------------------------------------------------------------
  96.     cl_mem texture = createCLTexture(context, &errcode);
  97.     if(CL_SUCCESS != errcode && 0 != texture) {
  98.         std::cout << "Texture creation failed!" << std::endl;
  99.         std::cout << "Error code: " << errcode;
  100.         return -1;
  101.     }
  102.  
  103.     cl_mem bufferOut = 0;
  104.     bufferOut = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_float)*W*H*3, 0, &errcode);
  105.     if(CL_SUCCESS != errcode && 0 != bufferOut) {
  106.         std::cout << "Buffer creation failed!" << std::endl;
  107.         return -1;
  108.     }
  109.     std::cout << "Buffer created.." << std::endl;
  110.  
  111.     // create program -----------------------------------------------------------------------------------
  112.     cl_program program = 0;
  113.     program = clCreateProgramWithSource(context, 1, (const char**)&kernel, NULL, &errcode);
  114.     if(CL_SUCCESS != errcode && 0 != program) {
  115.         std::cout << "Program creation failed!" << std::endl;
  116.         return -1;
  117.     }
  118.     std::cout << "Program created.." << std::endl;
  119.  
  120.     // building program ---------------------------------------------------------------------------------
  121.     if(CL_SUCCESS != (errcode = clBuildProgram(program, 0, NULL, NULL, NULL, NULL))) {
  122.         std::cout << "Building programm failed!" << std::endl;
  123.         std::cout << "Error Code: " << errcode << std::endl;
  124.  
  125.         char buffer[2000];
  126.         const char end = '\0';
  127.         size_t bytesRead = 0;
  128.         clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &bytesRead);
  129.  
  130.         std::cout << buffer << std::endl;
  131.  
  132.         return -1;
  133.     }
  134.     std::cout << "Program built.." << std::endl;
  135.  
  136.     // creating kernel ----------------------------------------------------------------------------------
  137.     cl_kernel kernel = 0;
  138.     kernel = clCreateKernel(program, "bilateralFilter", &errcode);
  139.     if(CL_SUCCESS != errcode && 0 != kernel) {
  140.         std::cout << "Kernel creation failed!" << std::endl;
  141.         return -1;
  142.     }
  143.     std::cout << "Kernel created.." << std::endl;
  144.  
  145.     // executing kernel ---------------------------------------------------------------------------------
  146.     errcode = clSetKernelArg(kernel, 0, sizeof(texture), &texture);
  147.     if(CL_SUCCESS != errcode) {
  148.         std::cout << "Could not set kernel argument 0" << std::endl;
  149.         std::cout << "Error Code: " << errcode;
  150.         return -1;
  151.     }
  152.  
  153.     if(CL_SUCCESS != clSetKernelArg(kernel, 1, sizeof(bufferOut), &bufferOut)) {
  154.         std::cout << "Could not set kernel argument 1" << std::endl;
  155.     }
  156.  
  157.     if(CL_SUCCESS != clSetKernelArg(kernel, 2, sizeof(W), &W)) {
  158.         std::cout << "Could not set kernel argument 2" << std::endl;
  159.     }
  160.  
  161.     if(CL_SUCCESS != clSetKernelArg(kernel, 3, sizeof(H), &H)) {
  162.         std::cout << "Could not set kernel argument 3" << std::endl;
  163.     }
  164.  
  165.     size_t global_dimensions[] = { W, H, 0 };
  166.  
  167.     errcode = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global_dimensions, 0, 0, NULL, NULL);
  168.     if(CL_SUCCESS != errcode) {
  169.         std::cout << "Could not enqueue kernel" << std::endl;
  170.         std::cout << "Error code: " << errcode << std::endl;
  171.         return -1;
  172.     }
  173.  
  174.     std::cout << "Kernel enqueued.." << std::endl;
  175.  
  176.     errcode = clFinish(queue);
  177.     std::cout << "Kernel executed.." << std::endl;
  178.  
  179.     if(CL_SUCCESS != errcode) {
  180.         std::cout << "Could not measure time because clFinish() failed!" << std::endl;
  181.         std::cout << "Error code: " << errcode << std::endl;
  182.         return -1;
  183.     }
  184.  
  185.     std::cout << "Reading back results.." << std::endl;
  186.     // Read back the results (note that this read is blocking since argument 3 is true)
  187.     std::vector<float> data;
  188.     data.resize(W*H*3);
  189.     clEnqueueReadBuffer(queue, bufferOut, CL_TRUE, 0, sizeof(cl_float)*data.size(), &data[0], 0, NULL, NULL);
  190.  
  191.     // Clean up -----------------------------------------------------------------------------------------
  192.     clReleaseMemObject(texture);
  193.     clReleaseMemObject(bufferOut);
  194.     clReleaseKernel(kernel);
  195.     clReleaseProgram(program);
  196.     clReleaseCommandQueue(queue);
  197.     clReleaseContext(context);
  198.  
  199.     std::cout << "Writing output.." << std::endl;
  200.  
  201.     // Print out the results ----------------------------------------------------------------------------
  202.     FILE * f = fopen("out.ppm", "wb");
  203.     fprintf(f, "P6\n%d %d\n255\n", W, H);
  204.     for(int y = 0; y < H; y++)
  205.     {
  206.         for(int x = 0; x < W; x++)
  207.         {
  208.             unsigned char color[3];
  209.             color[0] = (unsigned char)(data[3*(y*W+x)+0]*255.0f);
  210.             color[1] = (unsigned char)(data[3*(y*W+x)+1]*255.0f);
  211.             color[2] = (unsigned char)(data[3*(y*W+x)+2]*255.0f);
  212.             fwrite(color, 1, 3, f);
  213.         }
  214.     }
  215.     fclose(f);
  216.  
  217.     std::cout << "ALL DONE!" << std::endl;
  218. }