Recent Posts
XML | 6 sec ago
None | 22 sec ago
None | 37 sec ago
VB.NET | 48 sec ago
None | 53 sec ago
None | 1 min ago
None | 1 min ago
None | 2 min ago
None | 2 min ago
None | 2 min ago
Sitereport
Find cool info about any domain on the internet?
visit sitereport
Free Subdomains
Want a pastebin.com sub-domain for your community?
learn more...
What is pastebin?
Pastebin is a website that hosts all your text & code on dedicated servers for easy sharing.
learn more...
Learn a little bit about the new Pastebin.com on our help page. hide message
By altaic on the 9th of Nov 2009 11:48:04 AM Download | Raw | Embed | Report
  1. int exec_kernel(int ngrid,int natom,int ngadj,int naadj,
  2.                                 float *ax,float *ay,float *az,
  3.                                 float *gx,float *gy,float *gz,
  4.                                 float *charge,float *size,float xkappa,
  5.                                 float pre1,float *val,int itmax,const char * filename){
  6.  
  7.         cl_context                      context;
  8.        
  9.     cl_command_queue    cmd_queue;
  10.     cl_device_id                devices[2];
  11.         cl_uint                         count;
  12.         cl_uint                         device_index;
  13.  
  14.     cl_int                              err;
  15.        
  16.         // Connect to a compute device
  17.         err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 2, devices, &count);
  18.        
  19.         printf("Found %d GPU(s)...\n", count);
  20. //      for (int i=0; i < count; i++) { device_stats(devices[i]); }
  21.        
  22.         // choose the secondary gpu if there is one; the primary gpu is likely being used for the display
  23.         device_index = (count > 1)? 1 : 0;
  24.        
  25.     size_t returned_size = 0;
  26.     cl_char vendor_name[1024] = {0};
  27.     cl_char device_name[1024] = {0};
  28.     err = clGetDeviceInfo(devices[device_index], CL_DEVICE_VENDOR, sizeof(vendor_name), vendor_name, &returned_size);
  29.     err|= clGetDeviceInfo(devices[device_index], CL_DEVICE_NAME, sizeof(device_name), device_name, &returned_size);
  30.        
  31.     printf("Connecting to %s %s...\n", vendor_name, device_name);
  32. //      device_stats(devices[device_index]);
  33.        
  34.     // Read the program
  35.         printf("Loading program '%s'\n\n", filename);
  36.         char *program_source = load_program_source(filename);
  37.    
  38.         //Create the context and command queue
  39.         context = clCreateContext(0, 1, &devices[device_index], NULL, NULL, &err);
  40.         cmd_queue = clCreateCommandQueue(context, devices[device_index], 0, NULL);
  41.        
  42.         //Allocate memory for programs and kernels
  43.         cl_program program;
  44.     cl_kernel kernel;
  45.        
  46.         //Create program from .cl file
  47.         program = clCreateProgramWithSource(context,1, (const char**)&program_source, NULL, &err);
  48.        
  49.     // build the program (compile it)
  50.     err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
  51.         char build[2048];
  52.         clGetProgramBuildInfo(program, devices[device_index], CL_PROGRAM_BUILD_LOG, 2048, build, NULL);
  53.         printf("Build Log:\n%s\n",build);
  54.        
  55.         // create the kernel
  56.     kernel = clCreateKernel(program, "mdh", &err);
  57.        
  58.         uint64_t mbeg, mend;
  59.         double cl_alloc, cl_enqueue, cl_read;
  60.        
  61.         size_t atom_buffer_size = sizeof(float) * natom;
  62.         size_t grid_buffer_size = sizeof(float) * ngrid;
  63.         size_t gadj_buffer_size = sizeof(float) * ngadj;
  64.        
  65.         mbeg = mach_absolute_time();
  66.         //Allocate memory and queue it to be written to the device
  67.         cl_mem ax_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, atom_buffer_size, NULL, NULL);
  68.         err = clEnqueueWriteBuffer(cmd_queue, ax_mem, CL_TRUE, 0, atom_buffer_size, (void*)ax, 0, NULL, NULL);
  69.        
  70.         cl_mem ay_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, atom_buffer_size, NULL, NULL);
  71.         err = clEnqueueWriteBuffer(cmd_queue, ay_mem, CL_TRUE, 0, atom_buffer_size, (void*)ay, 0, NULL, NULL);
  72.        
  73.         cl_mem az_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, atom_buffer_size, NULL, NULL);
  74.         err = clEnqueueWriteBuffer(cmd_queue, az_mem, CL_TRUE, 0, atom_buffer_size, (void*)az, 0, NULL, NULL);
  75.        
  76.         cl_mem charge_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, atom_buffer_size, NULL, NULL);
  77.         err = clEnqueueWriteBuffer(cmd_queue, charge_mem, CL_TRUE, 0, atom_buffer_size, (void*)charge, 0, NULL, NULL);
  78.        
  79.         cl_mem size_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, atom_buffer_size, NULL, NULL);
  80.         err = clEnqueueWriteBuffer(cmd_queue, size_mem, CL_TRUE, 0, atom_buffer_size, (void*)size, 0, NULL, NULL);
  81.        
  82.         cl_mem gx_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, gadj_buffer_size, NULL, NULL);
  83.         err = clEnqueueWriteBuffer(cmd_queue, gx_mem, CL_TRUE, 0, gadj_buffer_size, (void*)gx, 0, NULL, NULL);
  84.        
  85.         cl_mem gy_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, gadj_buffer_size, NULL, NULL);
  86.         err = clEnqueueWriteBuffer(cmd_queue, gy_mem, CL_TRUE, 0, gadj_buffer_size, (void*)gy, 0, NULL, NULL);
  87.        
  88.         cl_mem gz_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, gadj_buffer_size, NULL, NULL);
  89.         err = clEnqueueWriteBuffer(cmd_queue, gz_mem, CL_TRUE, 0, gadj_buffer_size, (void*)gz, 0, NULL, NULL);
  90.        
  91.         cl_mem val_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, gadj_buffer_size, NULL, NULL);
  92.         err = clEnqueueWriteBuffer(cmd_queue, val_mem, CL_TRUE, 0, gadj_buffer_size, (void*)val, 0, NULL, NULL);
  93.        
  94.         //Push the data out to device
  95.         clFinish(cmd_queue);
  96.        
  97.         mend = mach_absolute_time();
  98.         cl_alloc = machcore(mend, mbeg);
  99.        
  100.         // set work-item dimensions
  101.         size_t global_work_size, local_work_size, shared_size;
  102.         global_work_size = ngadj;
  103.         local_work_size = 64;
  104.         shared_size = (5 * local_work_size) * sizeof(float);
  105.        
  106.         //Set kernel arguments
  107.         err  = clSetKernelArg(kernel,  0, sizeof(cl_mem), &ax_mem);
  108.         err |= clSetKernelArg(kernel,  1, sizeof(cl_mem), &ay_mem);
  109.         err |= clSetKernelArg(kernel,  2, sizeof(cl_mem), &az_mem);
  110.         err |= clSetKernelArg(kernel,  3, sizeof(cl_mem), &charge_mem);
  111.         err |= clSetKernelArg(kernel,  4, sizeof(cl_mem), &size_mem);
  112.         err |= clSetKernelArg(kernel,  5, sizeof(cl_mem), &gx_mem);
  113.         err |= clSetKernelArg(kernel,  6, sizeof(cl_mem), &gy_mem);
  114.         err |= clSetKernelArg(kernel,  7, sizeof(cl_mem), &gz_mem);
  115.         err |= clSetKernelArg(kernel,  8, sizeof(float), &pre1);
  116.         err |= clSetKernelArg(kernel,  9, sizeof(float), &xkappa);
  117.         err |= clSetKernelArg(kernel, 10, sizeof(cl_mem), &val_mem);
  118.         err |= clSetKernelArg(kernel, 11, sizeof(int), &natom);
  119.         err |= clSetKernelArg(kernel, 12, shared_size, NULL);
  120.        
  121.         size_t thread_size;
  122.         clGetKernelWorkGroupInfo(kernel,devices[device_index],CL_KERNEL_WORK_GROUP_SIZE,
  123.                                                          sizeof(size_t),&thread_size,NULL);
  124.         printf("Recommended Size: %lu\n",thread_size);
  125.        
  126.         mbeg = mach_absolute_time();
  127.         //Queue up the kernels itmax times
  128.         for(int i=0;i<itmax;i++)
  129.                 err = clEnqueueNDRangeKernel(cmd_queue, kernel, 1, NULL, &global_work_size,
  130.                                                                          &local_work_size, 0, NULL, NULL);
  131.        
  132.         //Finish the calculation
  133.         clFinish(cmd_queue);
  134.         mend = mach_absolute_time();
  135.         cl_enqueue = machcore(mend, mbeg);
  136.        
  137.         mbeg = mach_absolute_time();
  138.         // read output image
  139.         err = clEnqueueReadBuffer(cmd_queue, val_mem, CL_TRUE, 0, grid_buffer_size, val, 0, NULL, NULL);
  140.         clFinish(cmd_queue);
  141.         mend = mach_absolute_time();
  142.         cl_read = machcore(mend, mbeg);
  143.        
  144.         printf("Allocation: %1.12g Enqueue: %1.12g Read: %1.12g\n",cl_alloc,cl_enqueue,cl_read);
  145.         print_total(val, ngrid);
  146.  
  147.     // release kernel, program, and memory objects
  148.         clReleaseKernel(kernel);
  149.     clReleaseProgram(program);
  150.     clReleaseCommandQueue(cmd_queue);
  151.     clReleaseContext(context);
  152.        
  153.         clReleaseMemObject(ax_mem);
  154.         clReleaseMemObject(ay_mem);
  155.         clReleaseMemObject(az_mem);
  156.         clReleaseMemObject(charge_mem);
  157.         clReleaseMemObject(size_mem);
  158.        
  159.         clReleaseMemObject(gx_mem);
  160.         clReleaseMemObject(gy_mem);
  161.         clReleaseMemObject(gz_mem);
  162.        
  163.         clReleaseMemObject(val_mem);
  164.        
  165.         return CL_SUCCESS;
  166. }
Submit a correction or amendment below. Make A New Post
To highlight particular lines, prefix each line with @h@
Syntax highlighting:
Post expiration:
Post exposure:
Name / Title:
Email: