manLiu

makeSomethingOnGPU

May 15th, 2014
145
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1.                 //Allocating buffer for OpenCL
  2.         ByteBuffer pointsBuffer = Buffers.allocateBuffer(inClusters.getVectors().getVector());
  3.         ByteBuffer clusterOutBuffer = Buffers.allocateBuffer(outClusters);
  4.  
  5.         //And writing
  6.         Buffers.writeToBuffer(pointsBuffer, inClusters.getVectors().getVector());
  7.         Buffers.writeToBuffer(clusterOutBuffer, outClusters);
  8.  
  9.         cl_mem clustersInMem = clCreateBuffer(gpu.getClContext(),
  10.                 CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
  11.                 SizeofStruct.sizeof(CLVector.class) * k, Pointer.to(pointsBuffer), null);
  12.  
  13.         cl_mem clusterOutMem = clCreateBuffer(gpu.getClContext(),
  14.                 CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
  15.                 SizeofStruct.sizeof(CLVector.class) * outClusters.length, Pointer.to(clusterOutBuffer), null);
  16.  
  17.         clEnqueueWriteBuffer(gpu.getClCommandQueue(), clustersInMem, CL_TRUE, 0,
  18.                 SizeofStruct.sizeof(CLVector.class) * k, Pointer.to(pointsBuffer), 0, null, null);
  19.         clEnqueueWriteBuffer(gpu.getClCommandQueue(), clusterOutMem, CL_TRUE, 0,
  20.                 SizeofStruct.sizeof(CLVector.class) * outClusters.length, Pointer.to(clusterOutBuffer), 0, null, null);
  21.  
  22.         // Create the program from the source code
  23.         gpu.setClProgram(clCreateProgramWithSource(gpu.getClContext(), 1, new String[]{ kernelF }, null, null));
  24.  
  25.         // Build the program
  26.         //The 4th argument allows to create a global variable (NOTE: no passing a global memory address)
  27.         clBuildProgram(gpu.getClProgram(), 0, null, "-D SIZE="+attributes + " -D NVECTORS="+k
  28.                                                 +" -D BLOCK="+(int)Math.floor(1024 / attributes), null, null);
  29.  
  30.         // Create the clKernel
  31.         gpu.setClKernel(clCreateKernel(gpu.getClProgram(), "compute", null));
  32.  
  33.         // Set the arguments for the clKernel
  34.         clSetKernelArg(gpu.getClKernel(), 0, Sizeof.cl_mem, Pointer.to(clustersInMem));
  35.         clSetKernelArg(gpu.getClKernel(), 1, Sizeof.cl_mem, Pointer.to(clusterOutMem));
  36.  
  37.         long global_work_size[] = new long[]{(long)(attributes * Math.floor(1024 / attributes))};
  38.         long local_work_size[] = new long[]{attributes};
  39.  
  40.         // Execute the clKernel
  41.         clEnqueueNDRangeKernel(gpu.getClCommandQueue(), gpu.getClKernel(), 1, null, global_work_size, local_work_size, 0, null, null);
  42.        
  43.         // Read back the data from to memory object to the particle buffer
  44.         clEnqueueReadBuffer(gpu.getClCommandQueue(), clusterOutMem, true, 0,
  45.                 SizeofStruct.sizeof(CLVector.class) * outClusters.length, Pointer.to(clusterOutBuffer), 0 , null, null);
  46.  
  47.         clusterOutBuffer.rewind();
  48.         Buffers.readFromBuffer(clusterOutBuffer, outClusters);
  49.  
  50.         clReleaseMemObject(clustersInMem);
  51.         clReleaseMemObject(clusterOutMem);
RAW Paste Data