Advertisement
Guest User

Untitled

a guest
Jan 17th, 2017
101
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 6.61 KB | None | 0 0
  1. #define _CRT_SECURE_NO_WARNINGS
  2. #include <cstdio>
  3. #include <cstdlib>
  4.  
  5. #ifdef __APPLE__
  6. #include <OpenCL/opencl.h>
  7. #else
  8. #include <CL/cl.h>
  9. #endif
  10.  
  11. #define CHECK_ERROR(X) __checkError(__FILE__, __LINE__, X)
  12. inline void __checkError(const char *file, int line, cl_int error) {
  13.   if (error != CL_SUCCESS) {
  14.     printf("error %i in file %s, line %i\n", error, file, line);
  15.     exit(EXIT_FAILURE);
  16.   }
  17. }
  18.  
  19. void helloOpenCL(const int *in, int *out, int count) {
  20.   cl_device_id device_id = NULL;
  21.   cl_platform_id platform_id = NULL;
  22.   cl_uint ret_num_devices;
  23.   cl_uint ret_num_platforms;
  24.   cl_int err = CL_SUCCESS;
  25.   const int MAX_SOURCE_SIZE = 1e8;
  26.  
  27.   FILE *fp;
  28.   char fileName[] = "./kernel.cl";
  29.   char *source_str;
  30.   size_t source_size;
  31.  
  32.   /* Load the source code containing the kernel*/
  33.   fp = fopen(fileName, "r");
  34.   if (!fp) {
  35.     fprintf(stderr, "Failed to load kernel.\n");
  36.     exit(1);
  37.   }
  38.   source_str = new char[MAX_SOURCE_SIZE];
  39.   source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
  40.   fclose(fp);
  41.  
  42.   /* Get Platform and Device Info */
  43.   cl_platform_id *platforms = new cl_platform_id[2];
  44.   err = clGetPlatformIDs(2, platforms, &ret_num_platforms);
  45.   CHECK_ERROR(err);
  46.   err = clGetDeviceIDs(platforms[1], CL_DEVICE_TYPE_GPU, 1, &device_id,
  47.                        &ret_num_devices);
  48.   CHECK_ERROR(err);
  49.  
  50.   char vendor_name[1024];
  51.   err = clGetDeviceInfo(device_id, CL_DEVICE_VENDOR, sizeof(vendor_name),
  52.       &vendor_name, NULL);
  53.   printf("%s ", vendor_name);
  54.   CHECK_ERROR(err);
  55.  
  56.   /* Create OpenCL context */
  57.   cl_context context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &err);
  58.   CHECK_ERROR(err);
  59.  
  60.   /* Create Command Queue */
  61.   cl_command_queue command_queue =
  62.       clCreateCommandQueue(context, device_id, CL_QUEUE_PROFILING_ENABLE, &err);
  63.   CHECK_ERROR(err);
  64.  
  65.   /* Create Kernel Program from the source */
  66.   cl_program program =
  67.       clCreateProgramWithSource(context, 1, (const char **)&source_str,
  68.                                 (const size_t *)&source_size, &err);
  69.   CHECK_ERROR(err);
  70.  
  71.   /* Build Kernel Program */
  72.   err = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
  73.   if (err != CL_SUCCESS) {
  74.     size_t len;
  75.     char buffer[2048];
  76.     printf("Error: Failed to build program executable!\n");
  77.     clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG,
  78.                           sizeof(buffer), buffer, &len);
  79.     printf("%s\n", buffer);
  80.     exit(EXIT_FAILURE);
  81.   }
  82.  
  83.   /* Create OpenCL Kernel */
  84.   cl_kernel kernel = clCreateKernel(program, "helloOpenCLKernel", &err);
  85.   CHECK_ERROR(err);
  86.  
  87.   /* Create Memory Buffer */
  88.   cl_mem input = clCreateBuffer(context, CL_MEM_READ_ONLY,
  89.                                 count * sizeof(int), NULL, &err);
  90.   CHECK_ERROR(err);
  91.   cl_mem output = clCreateBuffer(context, CL_MEM_READ_WRITE,
  92.                                  count * sizeof(int), NULL, &err);
  93.   CHECK_ERROR(err);
  94.  
  95.   // Copy data to the device memory
  96.   err = clEnqueueWriteBuffer(command_queue, input, CL_TRUE, 0,
  97.                              sizeof(int) * count, in, 0, NULL, NULL);
  98.   CHECK_ERROR(err);
  99.  
  100.   /* Set OpenCL Kernel Parameters */
  101.   err = 0;
  102.   err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input);
  103.   err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
  104.   err |= clSetKernelArg(kernel, 2, sizeof(int), &count);
  105.   CHECK_ERROR(err);
  106.  
  107.   size_t global_item_size = count;
  108.   size_t local_item_size = 512;
  109.  
  110.   /* Execute OpenCL kernel as data parallel */
  111.   cl_event prof_event;
  112.   err = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_item_size,
  113.                                &local_item_size, 0, NULL, &prof_event);
  114.   CHECK_ERROR(err);
  115.  
  116.   // Wait for the commands to get serviced before reading back results
  117.   err = clFinish(command_queue);
  118.   CHECK_ERROR(err);
  119.  
  120.   // Calculate time taken by the kernel:
  121.   cl_ulong ev_start_time = (cl_ulong)0;
  122.   cl_ulong ev_end_time = (cl_ulong)0;
  123.   err = clWaitForEvents(1, &prof_event);
  124.   err |= clGetEventProfilingInfo(prof_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &ev_start_time, NULL);
  125.   err |= clGetEventProfilingInfo(prof_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &ev_end_time, NULL);
  126.   float run_time_gpu = (float)(ev_end_time - ev_start_time) / 1000000.f;
  127.   printf("run time = %0.3fms\n", run_time_gpu);
  128.   CHECK_ERROR(err);
  129.  
  130.   //(6) Read back the results from the device to verify the output
  131.   err = clEnqueueReadBuffer(command_queue, output, CL_TRUE, 0,
  132.                             sizeof(int) * count, out, 0, NULL, NULL);
  133.   CHECK_ERROR(err);
  134.  
  135.   int counter = 0;
  136.   for (int i = 1; i < count; i++) {
  137.       if (out[i] == in[i] - in[i - 1]) {
  138.           counter++;
  139.       }
  140.   }
  141.   printf("%d %d\n", count, counter);
  142.   /* Finalization */
  143.   err = clFlush(command_queue);
  144.   err |= clReleaseMemObject(input);
  145.   err | clReleaseMemObject(output);
  146.   err |= clReleaseKernel(kernel);
  147.   err |= clReleaseProgram(program);
  148.   err |= clReleaseCommandQueue(command_queue);
  149.   err |= clReleaseContext(context);
  150.   CHECK_ERROR(err);
  151.   delete[] source_str;
  152. }
  153.  
  154. #include <ctime>
  155. int main() {
  156.   const int sz = 1024*1024*16;
  157.   int *input = new int[sz];
  158.   int *output = new int[sz];
  159.   srand(time(NULL));
  160.   for (int i = 1; i <= sz; i++) {
  161.     input[i - 1] = i + i * (i & 2);
  162.     //printf("input[%d] = %f\n", i - 1, input[i - 1]);
  163.   }
  164.  
  165.   helloOpenCL(input, output, sz);
  166.  
  167.   delete[] input;
  168.   delete[] output;
  169.   return 0;
  170. }
  171.  
  172.  
  173.  
  174.  
  175.  
  176.  
  177.  
  178.  
  179.  
  180.  
  181.  
  182.  
  183.  
  184. //// kernel.cl
  185. #define BLOCK_SIZE 512
  186. #define global __global
  187. #define local __local
  188. /*
  189. const int globalId = get_global_id(0);
  190. if (globalId >= count) {
  191. return;
  192. }
  193.  
  194. const int localId = get_local_id(0);
  195. local int sharedData[BLOCK_SIZE]; //compile-time vs run-time
  196.  
  197. sharedData[localId] = in[globalId];
  198.  
  199. barrier(CLK_LOCAL_MEM_FENCE);
  200.  
  201. if (localId != 0) {
  202. out[globalId] = sharedData[localId] - sharedData[localId - 1];
  203. }
  204. else if (globalId != 0) {
  205. out[globalId] = sharedData[localId] - in[globalId - 1];
  206. }
  207.  
  208.  
  209. const int globalId = get_global_id(0);
  210.  
  211. if (globalId > 0) {
  212.  
  213. int curr = in[globalId];
  214. int prev = in[globalId - 1];
  215.  
  216. out[globalId] = curr - prev;
  217. }
  218.  
  219. */
  220. __kernel void helloOpenCLKernel(__global const int *in, __global int *out,
  221.                                 int count) {
  222.     const int globalId = get_global_id(0);
  223.     if (globalId >= count) {
  224.         return;
  225.     }
  226.  
  227.     const int localId = get_local_id(0);
  228.     local int sharedData[BLOCK_SIZE]; //compile-time vs run-time
  229.  
  230.     sharedData[localId] = in[globalId];
  231.  
  232.     barrier(CLK_LOCAL_MEM_FENCE);
  233.  
  234.     if (localId != 0) {
  235.         out[globalId] = sharedData[localId] - sharedData[localId - 1];
  236.     }
  237.     else if (globalId != 0) {
  238.         out[globalId] = sharedData[localId] - in[globalId - 1];
  239.     }
  240. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement