Advertisement
Guest User

Untitled

a guest
Jan 3rd, 2012
439
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 5.82 KB | None | 0 0
  1. /*
  2.  * JOCL - Java bindings for OpenCL
  3.  *
  4.  * Copyright 2009 Marco Hutter - http://www.jocl.org/
  5.  */
  6.  
  7. package jocltest;
  8.  
  9. import static org.jocl.CL.*;
  10.  
  11. import org.jocl.*;
  12.  
  13. /**
  14.  * A small JOCL sample.
  15.  */
  16. public class JOCLSample
  17. {
  18.     /**
  19.      * The source code of the OpenCL program to execute
  20.      */
  21.     static int n = 1;
  22.     static int tot = 20000000;
  23.     static int m = tot/n;
  24.    
  25.     private static String programSource =
  26.         "__kernel void "+
  27.         "sampleKernel(__global const float *a,"+
  28.         "             __global const float *b,"+
  29.         "             __global float *c)"+
  30.         "{"+
  31.         "    int gid = get_global_id(0);"+
  32.         "    float res = 0.0f;"+
  33.         "    for (int i=0; i<"+m+"; i++) {"+
  34.         "        res+=tanh(0.5596853f+i*0.001f+a[gid]);"+
  35.         "    }"+
  36.         "    c[gid]=res;"+
  37.         "}";
  38.    
  39.  
  40.     /**
  41.      * The entry point of this sample
  42.      *
  43.      * @param args Not used
  44.      */
  45.     public static void main(String args[])
  46.     {
  47.         // Create input- and output data
  48.         float srcArrayA[] = new float[n];
  49.         float srcArrayB[] = new float[n];
  50.         float dstArray[] = new float[n];
  51.         for (int i=0; i<n; i++)
  52.         {
  53.             srcArrayA[i] = i;
  54.             srcArrayB[i] = i;
  55.         }
  56.         long timestart = System.currentTimeMillis();
  57.         Pointer srcA = Pointer.to(srcArrayA);
  58.         Pointer srcB = Pointer.to(srcArrayB);
  59.         Pointer dst = Pointer.to(dstArray);
  60.  
  61.         // The platform, device type and device number
  62.         // that will be used
  63.         final int platformIndex = 0;
  64.         final long deviceType = CL_DEVICE_TYPE_ALL;
  65.         final int deviceIndex = 0;
  66.  
  67.         // Enable exceptions and subsequently omit error checks in this sample
  68.         CL.setExceptionsEnabled(true);
  69.  
  70.         // Obtain the number of platforms
  71.         int numPlatformsArray[] = new int[1];
  72.         clGetPlatformIDs(0, null, numPlatformsArray);
  73.         int numPlatforms = numPlatformsArray[0];
  74.  
  75.         // Obtain a platform ID
  76.         cl_platform_id platforms[] = new cl_platform_id[numPlatforms];
  77.         clGetPlatformIDs(platforms.length, platforms, null);
  78.         cl_platform_id platform = platforms[platformIndex];
  79.  
  80.         // Initialize the context properties
  81.         cl_context_properties contextProperties = new cl_context_properties();
  82.         contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform);
  83.        
  84.         // Obtain the number of devices for the platform
  85.         int numDevicesArray[] = new int[1];
  86.         clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray);
  87.         int numDevices = numDevicesArray[0];
  88.        
  89.         // Obtain a device ID
  90.         cl_device_id devices[] = new cl_device_id[numDevices];
  91.         clGetDeviceIDs(platform, deviceType, numDevices, devices, null);
  92.         cl_device_id device = devices[deviceIndex];
  93.  
  94.         // Create a context for the selected device
  95.         cl_context context = clCreateContext(
  96.             contextProperties, 1, new cl_device_id[]{device},
  97.             null, null, null);
  98.        
  99.         // Create a command-queue for the selected device
  100.         cl_command_queue commandQueue =
  101.             clCreateCommandQueue(context, device, 0, null);
  102.  
  103.         // Allocate the memory objects for the input- and output data
  104.         cl_mem memObjects[] = new cl_mem[3];
  105.         memObjects[0] = clCreateBuffer(context,
  106.             CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
  107.             Sizeof.cl_float * n, srcA, null);
  108.         memObjects[1] = clCreateBuffer(context,
  109.             CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
  110.             Sizeof.cl_float * n, srcB, null);
  111.         memObjects[2] = clCreateBuffer(context,
  112.             CL_MEM_READ_WRITE,
  113.             Sizeof.cl_float * n, null, null);
  114.        
  115.         // Create the program from the source code
  116.         cl_program program = clCreateProgramWithSource(context,
  117.             1, new String[]{ programSource }, null, null);
  118.        
  119.         // Build the program
  120.         clBuildProgram(program, 0, null, null, null, null);
  121.        
  122.         // Create the kernel
  123.         cl_kernel kernel = clCreateKernel(program, "sampleKernel", null);
  124.        
  125.         // Set the arguments for the kernel
  126.         clSetKernelArg(kernel, 0,
  127.             Sizeof.cl_mem, Pointer.to(memObjects[0]));
  128.         clSetKernelArg(kernel, 1,
  129.             Sizeof.cl_mem, Pointer.to(memObjects[1]));
  130.         clSetKernelArg(kernel, 2,
  131.             Sizeof.cl_mem, Pointer.to(memObjects[2]));
  132.        
  133.         // Set the work-item dimensions
  134.         long global_work_size[] = new long[]{n};
  135.         long local_work_size[] = new long[]{1};
  136.        
  137.         // Execute the kernel
  138.         clEnqueueNDRangeKernel(commandQueue, kernel, 1, null,
  139.             global_work_size, local_work_size, 0, null, null);
  140.        
  141.         // Read the output data
  142.         clEnqueueReadBuffer(commandQueue, memObjects[2], CL_TRUE, 0,
  143.             n * Sizeof.cl_float, dst, 0, null, null);
  144.        
  145.         // Release kernel, program, and memory objects
  146.         clReleaseMemObject(memObjects[0]);
  147.         clReleaseMemObject(memObjects[1]);
  148.         clReleaseMemObject(memObjects[2]);
  149.         clReleaseKernel(kernel);
  150.         clReleaseProgram(program);
  151.         clReleaseCommandQueue(commandQueue);
  152.         clReleaseContext(context);
  153.        
  154.         // Verify the result
  155.         long timemid = System.currentTimeMillis();
  156.         //System.out.println(n);
  157.         double res = 0;
  158.         for (int i=0; i<n; i++)
  159.         {
  160.             for (int k=0; k<m; k++) res+=Math.tan(0.5596853+i*0.001+srcArrayA[i]);
  161.         }
  162.         long timeend = System.currentTimeMillis();
  163.         System.out.println("res="+res);
  164.         System.out.println("GPU time: "+(timemid-timestart)+" ms");
  165.         System.out.println("CPU time: "+(timeend-timemid)+" ms");
  166.     }
  167. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement