makeSomethingOnGPU

                //Allocating buffer for OpenCL
        ByteBuffer pointsBuffer = Buffers.allocateBuffer(inClusters.getVectors().getVector());
        ByteBuffer clusterOutBuffer = Buffers.allocateBuffer(outClusters);

        //And writing
        Buffers.writeToBuffer(pointsBuffer, inClusters.getVectors().getVector());
        Buffers.writeToBuffer(clusterOutBuffer, outClusters);

        cl_mem clustersInMem = clCreateBuffer(gpu.getClContext(),
                CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
                SizeofStruct.sizeof(CLVector.class) * k, Pointer.to(pointsBuffer), null);

        cl_mem clusterOutMem = clCreateBuffer(gpu.getClContext(),
                CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
                SizeofStruct.sizeof(CLVector.class) * outClusters.length, Pointer.to(clusterOutBuffer), null);

        clEnqueueWriteBuffer(gpu.getClCommandQueue(), clustersInMem, CL_TRUE, 0,
                SizeofStruct.sizeof(CLVector.class) * k, Pointer.to(pointsBuffer), 0, null, null);
        clEnqueueWriteBuffer(gpu.getClCommandQueue(), clusterOutMem, CL_TRUE, 0,
                SizeofStruct.sizeof(CLVector.class) * outClusters.length, Pointer.to(clusterOutBuffer), 0, null, null);

        // Create the program from the source code
        gpu.setClProgram(clCreateProgramWithSource(gpu.getClContext(), 1, new String[]{ kernelF }, null, null));

        // Build the program
        //The 4th argument allows to create a global variable (NOTE: no passing a global memory address)
        clBuildProgram(gpu.getClProgram(), 0, null, "-D SIZE="+attributes + " -D NVECTORS="+k
                                                +" -D BLOCK="+(int)Math.floor(1024 / attributes), null, null);

        // Create the clKernel
        gpu.setClKernel(clCreateKernel(gpu.getClProgram(), "compute", null));

        // Set the arguments for the clKernel
        clSetKernelArg(gpu.getClKernel(), 0, Sizeof.cl_mem, Pointer.to(clustersInMem));
        clSetKernelArg(gpu.getClKernel(), 1, Sizeof.cl_mem, Pointer.to(clusterOutMem));

        long global_work_size[] = new long[]{(long)(attributes * Math.floor(1024 / attributes))};
        long local_work_size[] = new long[]{attributes};

        // Execute the clKernel
        clEnqueueNDRangeKernel(gpu.getClCommandQueue(), gpu.getClKernel(), 1, null, global_work_size, local_work_size, 0, null, null);

        // Read back the data from to memory object to the particle buffer
        clEnqueueReadBuffer(gpu.getClCommandQueue(), clusterOutMem, true, 0,
                SizeofStruct.sizeof(CLVector.class) * outClusters.length, Pointer.to(clusterOutBuffer), 0 , null, null);

        clusterOutBuffer.rewind();
        Buffers.readFromBuffer(clusterOutBuffer, outClusters);

        clReleaseMemObject(clustersInMem);
        clReleaseMemObject(clusterOutMem);