Pastebin launched a little side project called VERYVIRAL.com, check it out ;-) Want more features on Pastebin? Sign Up, it's FREE!
Guest

Untitled

By: a guest on Jan 31st, 2013  |  syntax: C++  |  size: 3.04 KB  |  views: 74  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. #include <cl.h>
  2. #include <string.h>
  3. #include <windows.h>
  4. #include <fstream>
  5. #include <string>
  6. #include "profiler.h"
  7. #pragma comment (lib, "opencl.lib")
  8.  
  9. size_t var_size = 0x4000000;
  10. int mem_size = sizeof(float)*var_size;
  11.  
  12. float *outH1 = new float[var_size];
  13. float *outH2 = new float[var_size];
  14. float *outH3 = new float[var_size];
  15.  
  16. cl_kernel kernel;
  17. cl_command_queue queue;
  18.  
  19. cl_mem buf1;
  20. cl_mem buf2;
  21. cl_mem buf3;
  22.  
  23. void openClFunc();
  24. void cpuFunc();
  25.  
  26. int main(int argc, char *argv[])
  27. {
  28.         const int platforms = 0xff;
  29.         cl_int error;
  30.         cl_uint num_platforms;
  31.         cl_platform_id platform[platforms];
  32.         error = clGetPlatformIDs(platforms, platform, &num_platforms);
  33.  
  34.         const int devices = 0xff;
  35.         cl_uint num_devices;
  36.         cl_device_id device[devices];
  37.  
  38.         clGetDeviceIDs(platform[0], CL_DEVICE_TYPE_GPU, devices, device, &num_devices);
  39.  
  40.         cl_context_properties cprops[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platform[0], 0};
  41.  
  42.         cl_context context = clCreateContext(cprops, 1, &device[0], 0, 0, &error);
  43.  
  44.         for(int i=0; i<var_size; i++)
  45.         {
  46.                 outH1[i] = 1;
  47.                 outH2[i] = 2;
  48.         }      
  49.  
  50.         buf1 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, mem_size, outH1, &error);
  51.         buf2 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, mem_size, outH2, &error);
  52.         buf3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, mem_size, 0, &error);
  53.        
  54.         queue = clCreateCommandQueue(context, device[0], 0, &error);
  55.  
  56.         std::ifstream ifs = std::ifstream("VectorAdd.cl");
  57.         bool bopen = ifs.is_open();
  58.         ifs.seekg(0, std::ios::end);
  59.         std::ifstream::pos_type filesize = ifs.tellg();
  60.         ifs.seekg(0, std::ios::beg);
  61.         char* progs[1];
  62.         char prog[0xffff];
  63.         progs[0] = prog;
  64.         memset(prog, 0, 0xffff);
  65.         ifs.read(prog, filesize);
  66.  
  67.         cl_program program = clCreateProgramWithSource(context, 1, (const char**)progs,  0, &error);
  68.  
  69.         char* build_log;
  70.         size_t log_size;
  71.  
  72.         cl_int res = clBuildProgram(program, 1, device, 0, 0, 0);
  73.  
  74.         clGetProgramBuildInfo(program, device[0],CL_PROGRAM_BUILD_LOG, 0, 0, &log_size);
  75.  
  76.         build_log = new char[log_size+1];
  77.  
  78.         clGetProgramBuildInfo(program, device[0],CL_PROGRAM_BUILD_LOG, log_size, build_log, &log_size);
  79.  
  80.         OutputDebugString(build_log);
  81.  
  82.         kernel = clCreateKernel(program, "vector_add_gpu", &error);
  83.  
  84.         res = clSetKernelArg(kernel, 0, sizeof(cl_mem), &buf1);
  85.         res = clSetKernelArg(kernel, 1, sizeof(cl_mem), &buf2);
  86.         res = clSetKernelArg(kernel, 2, sizeof(cl_mem), &buf3);
  87.  
  88.         int size = var_size;
  89.         res = clSetKernelArg(kernel, 3, sizeof(cl_int), &size);
  90.  
  91.         std::cout << "start" << std::endl;
  92.         cpuFunc();
  93.         openClFunc();
  94. }
  95.  
  96. void openClFunc()
  97. {
  98.         Profile p("openClFunc");
  99.         const size_t local_ws = 1;
  100.         const size_t global_ws = var_size;
  101.  
  102.         cl_int res1 = clEnqueueNDRangeKernel(queue, kernel, 1, 0, &global_ws, &local_ws, 0, 0, 0);
  103.  
  104.         //CL_SUCCESS
  105.  
  106.         // Reading back
  107.         float* check = new float[var_size];
  108.         cl_int res2 = clEnqueueReadBuffer(queue, buf3, CL_TRUE, 0, mem_size, check, 0, NULL, NULL);
  109. }
  110.  
  111.  
  112. void cpuFunc()
  113. {
  114.         Profile p("cpuFunc");
  115.         for(int i=0; i<var_size; i++)
  116.         {
  117.                 outH3[i] = outH1[i] + outH2[i];
  118.         }
  119. }
clone this paste RAW Paste Data