#include #include #include #include #include const int W = 960; // works with 961 for example... const int H = 540; const char * kernel = "__kernel void bilateralFilter(__read_only image2d_t img, __global float * dst, int width, int height)" "{" "const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;" "const int x = get_global_id(0);" "const int y = get_global_id(1);" "int2 coord = (int2)(x, y);" "float4 sum = read_imagef(img, smp, coord);" "const int id = y * width + x;" "dst[id*3] = sum.x;" "dst[id*3+1] = sum.y;" "dst[id*3+2] = sum.z;" "}\n"; //! Creates a 960x540 rgba int8 texture static cl_mem createCLTexture(cl_context ctx, cl_int * errcode) { std::vector data; data.resize(W*H*4); for(int y = 0; y < H; y++) { for(int x = 0; x < W; x++) { data[4*(y*W+x)+0] = x % 255; //((x+y) & 0x1) ? 0xFF : 0x0; data[4*(y*W+x)+1] = y % 255; //((x+y) & 0x1) ? 0xFF : 0x0; data[4*(y*W+x)+2] = ((x+y) & 0x1) ? 0xFF : 0x0; data[4*(y*W+x)+3] = ((x+y) & 0x1) ? 0xFF : 0x0; } } cl_image_format fmt; fmt.image_channel_order = CL_RGBA; fmt.image_channel_data_type = CL_UNORM_INT8; //CL_UNSIGNED_INT8; return clCreateImage2D(ctx, CL_MEM_COPY_HOST_PTR | CL_MEM_READ_ONLY, &fmt, W, H, W*4, &data[0], errcode); } int main (int argc, const char * argv[]) { std::cout << kernel << std::endl; // get platform ------------------------------------------------------------------------------------- cl_platform_id platform = 0; cl_uint numPlatformsFound = 0; if(CL_SUCCESS != clGetPlatformIDs(1, &platform, &numPlatformsFound)) { std::cout << "Could not query platforms." << std::endl; return -1; } std::cout << "Found " << numPlatformsFound << " Platforms" << std::endl; if(0 == numPlatformsFound) return -1; // get device --------------------------------------------------------------------------------------- cl_device_id device = 0; cl_uint numDevicesFound = 0; if(CL_SUCCESS != clGetDeviceIDs(platform, CL_DEVICE_TYPE_DEFAULT, 1, &device, &numDevicesFound)) { std::cout << "Could not query devices." << std::endl; return -1; } std::cout << "Found " << numDevicesFound << " Devices" << std::endl; if(0 == numDevicesFound) return -1; // create context ------------------------------------------------------------------------------------ cl_context context = 0; cl_int errcode; context = clCreateContext(NULL, 1, &device, NULL, NULL, &errcode); if(CL_SUCCESS != errcode && 0 != context) { std::cout << "Context creation failed!" << std::endl; std::cout << "Error code: " << errcode; return -1; } std::cout << "Context created.." << std::endl; // get queue ---------------------------------------------------------------------------------------- cl_command_queue queue = 0; queue = clCreateCommandQueue(context, device, (cl_command_queue_properties)0, &errcode); if(CL_SUCCESS != errcode && 0 != queue) { std::cout << "Command Queue creation failed!" << std::endl; return -1; } std::cout << "Command Queue created.." << std::endl; // create texture ----------------------------------------------------------------------------------- cl_mem texture = createCLTexture(context, &errcode); if(CL_SUCCESS != errcode && 0 != texture) { std::cout << "Texture creation failed!" << std::endl; std::cout << "Error code: " << errcode; return -1; } cl_mem bufferOut = 0; bufferOut = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_float)*W*H*3, 0, &errcode); if(CL_SUCCESS != errcode && 0 != bufferOut) { std::cout << "Buffer creation failed!" << std::endl; return -1; } std::cout << "Buffer created.." << std::endl; // create program ----------------------------------------------------------------------------------- cl_program program = 0; program = clCreateProgramWithSource(context, 1, (const char**)&kernel, NULL, &errcode); if(CL_SUCCESS != errcode && 0 != program) { std::cout << "Program creation failed!" << std::endl; return -1; } std::cout << "Program created.." << std::endl; // building program --------------------------------------------------------------------------------- if(CL_SUCCESS != (errcode = clBuildProgram(program, 0, NULL, NULL, NULL, NULL))) { std::cout << "Building programm failed!" << std::endl; std::cout << "Error Code: " << errcode << std::endl; char buffer[2000]; const char end = '\0'; size_t bytesRead = 0; clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &bytesRead); std::cout << buffer << std::endl; return -1; } std::cout << "Program built.." << std::endl; // creating kernel ---------------------------------------------------------------------------------- cl_kernel kernel = 0; kernel = clCreateKernel(program, "bilateralFilter", &errcode); if(CL_SUCCESS != errcode && 0 != kernel) { std::cout << "Kernel creation failed!" << std::endl; return -1; } std::cout << "Kernel created.." << std::endl; // executing kernel --------------------------------------------------------------------------------- errcode = clSetKernelArg(kernel, 0, sizeof(texture), &texture); if(CL_SUCCESS != errcode) { std::cout << "Could not set kernel argument 0" << std::endl; std::cout << "Error Code: " << errcode; return -1; } if(CL_SUCCESS != clSetKernelArg(kernel, 1, sizeof(bufferOut), &bufferOut)) { std::cout << "Could not set kernel argument 1" << std::endl; } if(CL_SUCCESS != clSetKernelArg(kernel, 2, sizeof(W), &W)) { std::cout << "Could not set kernel argument 2" << std::endl; } if(CL_SUCCESS != clSetKernelArg(kernel, 3, sizeof(H), &H)) { std::cout << "Could not set kernel argument 3" << std::endl; } size_t global_dimensions[] = { W, H, 0 }; errcode = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global_dimensions, 0, 0, NULL, NULL); if(CL_SUCCESS != errcode) { std::cout << "Could not enqueue kernel" << std::endl; std::cout << "Error code: " << errcode << std::endl; return -1; } std::cout << "Kernel enqueued.." << std::endl; errcode = clFinish(queue); std::cout << "Kernel executed.." << std::endl; if(CL_SUCCESS != errcode) { std::cout << "Could not measure time because clFinish() failed!" << std::endl; std::cout << "Error code: " << errcode << std::endl; return -1; } std::cout << "Reading back results.." << std::endl; // Read back the results (note that this read is blocking since argument 3 is true) std::vector data; data.resize(W*H*3); clEnqueueReadBuffer(queue, bufferOut, CL_TRUE, 0, sizeof(cl_float)*data.size(), &data[0], 0, NULL, NULL); // Clean up ----------------------------------------------------------------------------------------- clReleaseMemObject(texture); clReleaseMemObject(bufferOut); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(queue); clReleaseContext(context); std::cout << "Writing output.." << std::endl; // Print out the results ---------------------------------------------------------------------------- FILE * f = fopen("out.ppm", "wb"); fprintf(f, "P6\n%d %d\n255\n", W, H); for(int y = 0; y < H; y++) { for(int x = 0; x < W; x++) { unsigned char color[3]; color[0] = (unsigned char)(data[3*(y*W+x)+0]*255.0f); color[1] = (unsigned char)(data[3*(y*W+x)+1]*255.0f); color[2] = (unsigned char)(data[3*(y*W+x)+2]*255.0f); fwrite(color, 1, 3, f); } } fclose(f); std::cout << "ALL DONE!" << std::endl; }