Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <cl.h>
- #include <string.h>
- #include <windows.h>
- #include <fstream>
- #include <string>
- #include "profiler.h"
- #pragma comment (lib, "opencl.lib")
- size_t var_size = 0x4000000;
- int mem_size = sizeof(float)*var_size;
- float *outH1 = new float[var_size];
- float *outH2 = new float[var_size];
- float *outH3 = new float[var_size];
- cl_kernel kernel;
- cl_command_queue queue;
- cl_mem buf1;
- cl_mem buf2;
- cl_mem buf3;
- void openClFunc();
- void cpuFunc();
- int main(int argc, char *argv[])
- {
- const int platforms = 0xff;
- cl_int error;
- cl_uint num_platforms;
- cl_platform_id platform[platforms];
- error = clGetPlatformIDs(platforms, platform, &num_platforms);
- const int devices = 0xff;
- cl_uint num_devices;
- cl_device_id device[devices];
- clGetDeviceIDs(platform[0], CL_DEVICE_TYPE_GPU, devices, device, &num_devices);
- cl_context_properties cprops[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platform[0], 0};
- cl_context context = clCreateContext(cprops, 1, &device[0], 0, 0, &error);
- for(int i=0; i<var_size; i++)
- {
- outH1[i] = 1;
- outH2[i] = 2;
- }
- buf1 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, mem_size, outH1, &error);
- buf2 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, mem_size, outH2, &error);
- buf3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, mem_size, 0, &error);
- queue = clCreateCommandQueue(context, device[0], 0, &error);
- std::ifstream ifs = std::ifstream("VectorAdd.cl");
- bool bopen = ifs.is_open();
- ifs.seekg(0, std::ios::end);
- std::ifstream::pos_type filesize = ifs.tellg();
- ifs.seekg(0, std::ios::beg);
- char* progs[1];
- char prog[0xffff];
- progs[0] = prog;
- memset(prog, 0, 0xffff);
- ifs.read(prog, filesize);
- cl_program program = clCreateProgramWithSource(context, 1, (const char**)progs, 0, &error);
- char* build_log;
- size_t log_size;
- cl_int res = clBuildProgram(program, 1, device, 0, 0, 0);
- clGetProgramBuildInfo(program, device[0],CL_PROGRAM_BUILD_LOG, 0, 0, &log_size);
- build_log = new char[log_size+1];
- clGetProgramBuildInfo(program, device[0],CL_PROGRAM_BUILD_LOG, log_size, build_log, &log_size);
- OutputDebugString(build_log);
- kernel = clCreateKernel(program, "vector_add_gpu", &error);
- res = clSetKernelArg(kernel, 0, sizeof(cl_mem), &buf1);
- res = clSetKernelArg(kernel, 1, sizeof(cl_mem), &buf2);
- res = clSetKernelArg(kernel, 2, sizeof(cl_mem), &buf3);
- int size = var_size;
- res = clSetKernelArg(kernel, 3, sizeof(cl_int), &size);
- std::cout << "start" << std::endl;
- cpuFunc();
- openClFunc();
- }
- void openClFunc()
- {
- Profile p("openClFunc");
- const size_t local_ws = 1;
- const size_t global_ws = var_size;
- cl_int res1 = clEnqueueNDRangeKernel(queue, kernel, 1, 0, &global_ws, &local_ws, 0, 0, 0);
- //CL_SUCCESS
- // Reading back
- float* check = new float[var_size];
- cl_int res2 = clEnqueueReadBuffer(queue, buf3, CL_TRUE, 0, mem_size, check, 0, NULL, NULL);
- }
- void cpuFunc()
- {
- Profile p("cpuFunc");
- for(int i=0; i<var_size; i++)
- {
- outH3[i] = outH1[i] + outH2[i];
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement