Advertisement
cataleena

MatrixMul-OpenCL-CPU

May 18th, 2024
497
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 2.53 KB | None | 0 0
  1. #include <iostream>
  2. #include <vector>
  3. #include <fstream>
  4. #include <CL/cl.hpp>
  5. #include <chrono>
  6.  
  7. #define N 10000 // Matrix size
  8.  
  9. // Function to initialize matrices A and B
  10. void initializeMatrices(std::vector<float>& A, std::vector<float>& B) {
  11.     for (int i = 0; i < N * N; ++i) {
  12.         A[i] = 1.0f;
  13.         B[i] = 2.0f;
  14.     }
  15. }
  16.  
  17. int main() {
  18.     // Initialize OpenCL context, command queue, and program
  19.     cl::Platform platform = cl::Platform::getDefault();
  20.     cl::Device device = cl::Device::getDefault();
  21.     cl::Context context(device);
  22.     cl::CommandQueue queue(context, device);
  23.  
  24.     // Load OpenCL source code from file
  25.     std::ifstream sourceFile("matrix_multiplication.cl");
  26.     std::string sourceCode(std::istreambuf_iterator<char>(sourceFile), (std::istreambuf_iterator<char>()));
  27.  
  28.     // Create program from source code
  29.     cl::Program::Sources sources(1, std::make_pair(sourceCode.c_str(), sourceCode.length()));
  30.     cl::Program program(context, sources);
  31.  
  32.     // Build program for the device
  33.     program.build();
  34.  
  35.     // Define input matrices A and B
  36.     std::vector<float> A(N * N);
  37.     std::vector<float> B(N * N);
  38.     initializeMatrices(A, B);
  39.  
  40.     // Allocate memory for output matrix C
  41.     std::vector<float> C(N * N, 0.0f);
  42.  
  43.     // Create buffer objects for input and output matrices
  44.     cl::Buffer bufferA(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * A.size(), A.data());
  45.     cl::Buffer bufferB(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * B.size(), B.data());
  46.     cl::Buffer bufferC(context, CL_MEM_WRITE_ONLY, sizeof(float) * C.size());
  47.  
  48.     // Create kernel object
  49.     cl::Kernel kernel(program, "matrixMultiplication");
  50.     kernel.setArg(0, bufferA);
  51.     kernel.setArg(1, bufferB);
  52.     kernel.setArg(2, bufferC);
  53.     kernel.setArg(3, N);
  54.  
  55.     // Execute kernel on CPU
  56.     auto startCPU = std::chrono::high_resolution_clock::now();
  57.     queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(N, N));
  58.     queue.finish();
  59.     auto stopCPU = std::chrono::high_resolution_clock::now();
  60.     auto durationCPU = std::chrono::duration_cast<std::chrono::milliseconds>(stopCPU - startCPU);
  61.  
  62.     // Read output matrix C from device to host
  63.     queue.enqueueReadBuffer(bufferC, CL_TRUE, 0, sizeof(float) * C.size(), C.data());
  64.  
  65.     // Measure performance on CPU
  66.     double tflopsCPU = (2.0 * N * N * N) / (static_cast<double>(durationCPU.count()) * 1e6);
  67.  
  68.     std::cout << "Performance on CPU: " << tflopsCPU << " TFLOPS" << std::endl;
  69.  
  70.     return 0;
  71. }
  72.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement