Guest User

Simple OpenCL program

a guest
Oct 22nd, 2020
55
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #include <stdio.h>
  2. #include "CL/cl.h"
  3.  
  4. #define DATA_SIZE 100
  5.  
  6. const char *KernelSource =
  7.     "__kernel void hello(__global float *input, __global float *output)\n"
  8.     "{\n"
  9.     "  size_t id = get_global_id(0);\n"
  10.     "  output[id] = 5*input[id];\n"
  11.     "}\n"
  12.     "\n";
  13.  
  14. int main(void) {
  15.     cl_context context;
  16.     cl_context_properties properties[3];
  17.     cl_kernel kernel;
  18.     cl_command_queue command_queue;
  19.     cl_program program;
  20.     cl_int err;
  21.     cl_uint num_of_platforms=0;
  22.     cl_platform_id platform_id;
  23.     cl_device_id device_id;
  24.     cl_uint num_of_devices=0;
  25.     cl_mem input, output;
  26.     size_t global;
  27.     float *buffer;
  28.     int i;
  29.  
  30.     buffer = malloc (sizeof (float) * DATA_SIZE);
  31.     for (i=0; i<DATA_SIZE; i++) buffer[i] = i;
  32.    
  33.     // retreives a list of platforms available
  34.     if (clGetPlatformIDs(1, &platform_id, &num_of_platforms)!= CL_SUCCESS) {
  35.         printf("Unable to get platform_id\n");
  36.         return 1;
  37.     }
  38.  
  39.     // try to get a supported GPU device
  40.     if (clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id,
  41.                        &num_of_devices) != CL_SUCCESS)
  42.     {
  43.         printf("Unable to get device_id\n");
  44.         return 1;
  45.     }
  46.  
  47.     // context properties list - must be terminated with 0
  48.     properties[0]= CL_CONTEXT_PLATFORM;
  49.     properties[1]= (cl_context_properties) platform_id;
  50.     properties[2]= 0;
  51.    
  52.     // create a context with the GPU device
  53.     context = clCreateContext(properties,1,&device_id,NULL,NULL,&err);
  54.  
  55.     // create command queue using the context and device
  56.     command_queue = clCreateCommandQueue(context, device_id, 0, &err);
  57.     program = clCreateProgramWithSource(context,1,(const char **)&KernelSource, NULL, &err);
  58.  
  59.     // compile the program
  60.     if (clBuildProgram(program, 0, NULL, NULL, NULL, NULL) != CL_SUCCESS) {
  61.         printf("Error building program\n");
  62.         return 1;
  63.     }
  64.  
  65.     // specify which kernel from the program to execute
  66.     kernel = clCreateKernel(program, "hello", &err);
  67.  
  68.     // create buffers for the input and ouput
  69.     input  = clCreateBuffer(context, CL_MEM_READ_ONLY,  sizeof(float) * DATA_SIZE, NULL, NULL);
  70.     output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * DATA_SIZE, NULL, NULL);
  71.  
  72.     // load data into the input buffer
  73.     clEnqueueWriteBuffer(command_queue, input, CL_TRUE, 0,
  74.                          sizeof(float) * DATA_SIZE, buffer, 0, NULL, NULL);
  75.  
  76.     // set the argument list for the kernel command
  77.     clSetKernelArg(kernel, 0, sizeof(cl_mem), &input);
  78.     clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
  79.     global=DATA_SIZE;
  80.  
  81.     // enqueue the kernel command for execution
  82.     clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL);
  83.     clFinish(command_queue);
  84.  
  85.     // copy the results from out of the output
  86.     clEnqueueReadBuffer(command_queue, output, CL_TRUE, 0, sizeof(float) * DATA_SIZE,
  87.                         buffer, 0, NULL, NULL);
  88.  
  89.     // print the results
  90.     printf("output: ");
  91.     for(i=0; i<DATA_SIZE; i++) printf("%f ",buffer[i]);
  92.     printf ("\n");
  93.  
  94.     // cleanup - release OpenCL resources
  95.     clReleaseMemObject(input);
  96.     clReleaseMemObject(output);
  97.     clReleaseProgram(program);
  98.     clReleaseKernel(kernel);
  99.     clReleaseCommandQueue(command_queue);
  100.     clReleaseContext(context);
  101.     free (buffer);
  102.  
  103.     return 0;
  104. }
  105.  
RAW Paste Data