Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
- import com.jogamp.opencl.CLBuffer;
- import com.jogamp.opencl.CLCommandQueue;
- import com.jogamp.opencl.CLContext;
- import com.jogamp.opencl.CLDevice;
- import com.jogamp.opencl.CLKernel;
- import com.jogamp.opencl.CLPlatform;
- import com.jogamp.opencl.CLProgram;
- import java.io.IOException;
- import java.io.InputStream;
- import java.nio.FloatBuffer;
- import java.util.Random;
- import static java.lang.System.*;
- import static com.jogamp.opencl.CLMemory.Mem.*;
- import java.io.File;
- import java.io.FileNotFoundException;
- import static java.lang.Math.*;
- import java.net.URLConnection;
- import java.nio.file.Files;
- /**
- * Hello Java OpenCL example. Adds all elements of buffer A to buffer B
- * and stores the result in buffer C.<br/>
- * Sample was inspired by the Nvidia VectorAdd example written in C/C++
- * which is bundled in the Nvidia OpenCL SDK.
- * @author Michael Bien
- */
- public class HelloJOCL {
- public static void main(String[] args) throws IOException {
- CLPlatform[] clPl = CLPlatform.listCLPlatforms();
- System.out.println("List Platforms");
- for(int i =0;i < clPl.length;i++){
- System.out.println(clPl[i].getName());
- System.out.println("Devices per platform :"+clPl[i].listCLDevices().length);
- System.out.println("Compute Devices on max flops device:"+
- clPl[i].getMaxFlopsDevice().getMaxComputeUnits());
- System.out.println(" Max flops device:"+
- clPl[i].getMaxFlopsDevice().getMaxClockFrequency());
- }
- // set up (uses default CLPlatform and creates context for all devices)
- CLContext context = CLContext.create(clPl[0]);
- out.println("created "+context);
- // always make sure to release the context under all circumstances
- // not needed for this particular sample but recommented
- try{
- // select fastest device
- CLDevice device = context.getMaxFlopsDevice();
- out.println("using "+device);
- // create command queue on device.
- CLCommandQueue queue = device.createCommandQueue();
- int elementCount = 1444477; // Length of arrays to process
- int localWorkSize = min(device.getMaxWorkGroupSize(), 256); // Local work size dimensions
- int globalWorkSize = roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the localWorkSize
- String fname = "vector_add.cl";
- File f = new File(fname);
- System.out.println("Current Path:"+f.getAbsolutePath());
- if(f.exists()){
- System.out.println("Found Kernel File");
- }else{
- System.out.println("Assuming running from IDE folder.");
- String fname2 = "./src/"+fname;
- f = new File(fname2);
- if(f.exists()){
- System.out.println("Found Kernel File");
- fname = "./src/"+fname;
- }else{
- System.out.println("Assuming running from Netbeans distribution folder.");
- fname2 = "../src/"+fname;
- f = new File(fname2);
- if(f.exists()){
- System.out.println("Found Kernel File");
- fname = "../src/"+fname;
- }else{
- System.out.println("Kernel File Not Found");}}
- }
- System.out.println("fname:"+fname);
- //InputStream vec = HelloJOCL.class.getResourceAsStream(fname);
- CLProgram program=null;
- try{
- String sourceCode = readFile(fname);
- // load sources, create and build program
- program = context.createProgram(sourceCode).build();
- System.out.println("prog: "+program);
- }catch (Exception e) {
- e.printStackTrace();
- }
- // A, B are input buffers, C is for the result
- CLBuffer<FloatBuffer> clBufferA = context.createFloatBuffer(globalWorkSize, READ_ONLY);
- CLBuffer<FloatBuffer> clBufferB = context.createFloatBuffer(globalWorkSize, READ_ONLY);
- CLBuffer<FloatBuffer> clBufferC = context.createFloatBuffer(globalWorkSize, WRITE_ONLY);
- out.println("used device memory: "
- + (clBufferA.getCLSize()+clBufferB.getCLSize()+clBufferC.getCLSize())/1000000 +"MB");
- float numA = 1.5f;
- float numB = 2.3f;
- System.out.println("Adding A: "+numA+"+ B: "+numB+" ="+(numA+numB));
- // fill input buffers with random numbers
- // (just to have test data; seed is fixed -> results will not change between runs).
- fillBuffer(clBufferA.getBuffer(), numA);
- fillBuffer(clBufferB.getBuffer(), numB);
- System.out.println("Make kernel.");
- // get a reference to the kernel function with the name 'VectorAdd'
- // and map the buffers to its input parameters.
- CLKernel kernel = program.createCLKernel("vector_add");
- kernel.putArgs(clBufferA, clBufferB, clBufferC).putArg(elementCount);
- System.out.println("Running kernel.");
- // asynchronous write of data to GPU device,
- // followed by blocking read to get the computed results back.
- long time = nanoTime();
- queue.putWriteBuffer(clBufferA, false)
- .putWriteBuffer(clBufferB, false)
- .put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize)
- .putReadBuffer(clBufferC, true);
- time = nanoTime() - time;
- // print first few elements of the resulting buffer to the console.
- out.println("a+b=c results snapshot: ");
- for(int i = 0; i < 10; i++)
- out.print(clBufferC.getBuffer().get() + ", ");
- out.println("...; " + clBufferC.getBuffer().remaining() + " more");
- out.println("computation took: "+(time/1000000)+"ms");
- }finally{
- // cleanup all resources associated with this context.
- context.release();
- }
- }
- private static void fillBuffer(FloatBuffer buffer, float setf) {
- while(buffer.remaining() != 0)
- buffer.put(setf);
- buffer.rewind();
- }
- private static int roundUp(int groupSize, int globalSize) {
- int r = globalSize % groupSize;
- if (r == 0) {
- return globalSize;
- } else {
- return globalSize + groupSize - r;
- }
- }
- private static String readFile(String filename) {
- File f = new File(filename);
- try {
- byte[] bytes = Files.readAllBytes(f.toPath());
- return new String(bytes,"UTF-8");
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- }
- return "";
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement