Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #define _CRT_SECURE_NO_WARNINGS
- #include <cstdio>
- #include <cstdlib>
- #ifdef __APPLE__
- #include <OpenCL/opencl.h>
- #else
- #include <CL/cl.h>
- #endif
- #define CHECK_ERROR(X) __checkError(__FILE__, __LINE__, X)
- inline void __checkError(const char *file, int line, cl_int error) {
- if (error != CL_SUCCESS) {
- printf("error %i in file %s, line %i\n", error, file, line);
- exit(EXIT_FAILURE);
- }
- }
- void helloOpenCL(const int *in, int *out, int count) {
- cl_device_id device_id = NULL;
- cl_platform_id platform_id = NULL;
- cl_uint ret_num_devices;
- cl_uint ret_num_platforms;
- cl_int err = CL_SUCCESS;
- const int MAX_SOURCE_SIZE = 1e8;
- FILE *fp;
- char fileName[] = "./kernel.cl";
- char *source_str;
- size_t source_size;
- /* Load the source code containing the kernel*/
- fp = fopen(fileName, "r");
- if (!fp) {
- fprintf(stderr, "Failed to load kernel.\n");
- exit(1);
- }
- source_str = new char[MAX_SOURCE_SIZE];
- source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
- fclose(fp);
- /* Get Platform and Device Info */
- cl_platform_id *platforms = new cl_platform_id[2];
- err = clGetPlatformIDs(2, platforms, &ret_num_platforms);
- CHECK_ERROR(err);
- err = clGetDeviceIDs(platforms[1], CL_DEVICE_TYPE_GPU, 1, &device_id,
- &ret_num_devices);
- CHECK_ERROR(err);
- char vendor_name[1024];
- err = clGetDeviceInfo(device_id, CL_DEVICE_VENDOR, sizeof(vendor_name),
- &vendor_name, NULL);
- printf("%s ", vendor_name);
- CHECK_ERROR(err);
- /* Create OpenCL context */
- cl_context context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &err);
- CHECK_ERROR(err);
- /* Create Command Queue */
- cl_command_queue command_queue =
- clCreateCommandQueue(context, device_id, CL_QUEUE_PROFILING_ENABLE, &err);
- CHECK_ERROR(err);
- /* Create Kernel Program from the source */
- cl_program program =
- clCreateProgramWithSource(context, 1, (const char **)&source_str,
- (const size_t *)&source_size, &err);
- CHECK_ERROR(err);
- /* Build Kernel Program */
- err = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
- if (err != CL_SUCCESS) {
- size_t len;
- char buffer[2048];
- printf("Error: Failed to build program executable!\n");
- clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG,
- sizeof(buffer), buffer, &len);
- printf("%s\n", buffer);
- exit(EXIT_FAILURE);
- }
- /* Create OpenCL Kernel */
- cl_kernel kernel = clCreateKernel(program, "helloOpenCLKernel", &err);
- CHECK_ERROR(err);
- /* Create Memory Buffer */
- cl_mem input = clCreateBuffer(context, CL_MEM_READ_ONLY,
- count * sizeof(int), NULL, &err);
- CHECK_ERROR(err);
- cl_mem output = clCreateBuffer(context, CL_MEM_READ_WRITE,
- count * sizeof(int), NULL, &err);
- CHECK_ERROR(err);
- // Copy data to the device memory
- err = clEnqueueWriteBuffer(command_queue, input, CL_TRUE, 0,
- sizeof(int) * count, in, 0, NULL, NULL);
- CHECK_ERROR(err);
- /* Set OpenCL Kernel Parameters */
- err = 0;
- err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input);
- err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
- err |= clSetKernelArg(kernel, 2, sizeof(int), &count);
- CHECK_ERROR(err);
- size_t global_item_size = count;
- size_t local_item_size = 512;
- /* Execute OpenCL kernel as data parallel */
- cl_event prof_event;
- err = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_item_size,
- &local_item_size, 0, NULL, &prof_event);
- CHECK_ERROR(err);
- // Wait for the commands to get serviced before reading back results
- err = clFinish(command_queue);
- CHECK_ERROR(err);
- // Calculate time taken by the kernel:
- cl_ulong ev_start_time = (cl_ulong)0;
- cl_ulong ev_end_time = (cl_ulong)0;
- err = clWaitForEvents(1, &prof_event);
- err |= clGetEventProfilingInfo(prof_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &ev_start_time, NULL);
- err |= clGetEventProfilingInfo(prof_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &ev_end_time, NULL);
- float run_time_gpu = (float)(ev_end_time - ev_start_time) / 1000000.f;
- printf("run time = %0.3fms\n", run_time_gpu);
- CHECK_ERROR(err);
- //(6) Read back the results from the device to verify the output
- err = clEnqueueReadBuffer(command_queue, output, CL_TRUE, 0,
- sizeof(int) * count, out, 0, NULL, NULL);
- CHECK_ERROR(err);
- int counter = 0;
- for (int i = 1; i < count; i++) {
- if (out[i] == in[i] - in[i - 1]) {
- counter++;
- }
- }
- printf("%d %d\n", count, counter);
- /* Finalization */
- err = clFlush(command_queue);
- err |= clReleaseMemObject(input);
- err | clReleaseMemObject(output);
- err |= clReleaseKernel(kernel);
- err |= clReleaseProgram(program);
- err |= clReleaseCommandQueue(command_queue);
- err |= clReleaseContext(context);
- CHECK_ERROR(err);
- delete[] source_str;
- }
- #include <ctime>
- int main() {
- const int sz = 1024*1024*16;
- int *input = new int[sz];
- int *output = new int[sz];
- srand(time(NULL));
- for (int i = 1; i <= sz; i++) {
- input[i - 1] = i + i * (i & 2);
- //printf("input[%d] = %f\n", i - 1, input[i - 1]);
- }
- helloOpenCL(input, output, sz);
- delete[] input;
- delete[] output;
- return 0;
- }
- //// kernel.cl
- #define BLOCK_SIZE 512
- #define global __global
- #define local __local
- /*
- const int globalId = get_global_id(0);
- if (globalId >= count) {
- return;
- }
- const int localId = get_local_id(0);
- local int sharedData[BLOCK_SIZE]; //compile-time vs run-time
- sharedData[localId] = in[globalId];
- barrier(CLK_LOCAL_MEM_FENCE);
- if (localId != 0) {
- out[globalId] = sharedData[localId] - sharedData[localId - 1];
- }
- else if (globalId != 0) {
- out[globalId] = sharedData[localId] - in[globalId - 1];
- }
- const int globalId = get_global_id(0);
- if (globalId > 0) {
- int curr = in[globalId];
- int prev = in[globalId - 1];
- out[globalId] = curr - prev;
- }
- */
- __kernel void helloOpenCLKernel(__global const int *in, __global int *out,
- int count) {
- const int globalId = get_global_id(0);
- if (globalId >= count) {
- return;
- }
- const int localId = get_local_id(0);
- local int sharedData[BLOCK_SIZE]; //compile-time vs run-time
- sharedData[localId] = in[globalId];
- barrier(CLK_LOCAL_MEM_FENCE);
- if (localId != 0) {
- out[globalId] = sharedData[localId] - sharedData[localId - 1];
- }
- else if (globalId != 0) {
- out[globalId] = sharedData[localId] - in[globalId - 1];
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment