Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include <vector>
- #include <fstream>
- #include <CL/cl.hpp>
- #include <chrono>
- #define N 10000 // Matrix size
- // Function to initialize matrices A and B
- void initializeMatrices(std::vector<float>& A, std::vector<float>& B) {
- for (int i = 0; i < N * N; ++i) {
- A[i] = 1.0f;
- B[i] = 2.0f;
- }
- }
- int main() {
- // Initialize OpenCL context, command queue, and program
- cl::Platform platform = cl::Platform::getDefault();
- cl::Device device = cl::Device::getDefault();
- cl::Context context(device);
- cl::CommandQueue queue(context, device);
- // Load OpenCL source code from file
- std::ifstream sourceFile("matrix_multiplication.cl");
- std::string sourceCode(std::istreambuf_iterator<char>(sourceFile), (std::istreambuf_iterator<char>()));
- // Create program from source code
- cl::Program::Sources sources(1, std::make_pair(sourceCode.c_str(), sourceCode.length()));
- cl::Program program(context, sources);
- // Build program for the device
- program.build();
- // Define input matrices A and B
- std::vector<float> A(N * N);
- std::vector<float> B(N * N);
- initializeMatrices(A, B);
- // Allocate memory for output matrix C
- std::vector<float> C(N * N, 0.0f);
- // Create buffer objects for input and output matrices
- cl::Buffer bufferA(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * A.size(), A.data());
- cl::Buffer bufferB(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * B.size(), B.data());
- cl::Buffer bufferC(context, CL_MEM_WRITE_ONLY, sizeof(float) * C.size());
- // Create kernel object
- cl::Kernel kernel(program, "matrixMultiplication");
- kernel.setArg(0, bufferA);
- kernel.setArg(1, bufferB);
- kernel.setArg(2, bufferC);
- kernel.setArg(3, N);
- // Execute kernel on CPU
- auto startCPU = std::chrono::high_resolution_clock::now();
- queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(N, N));
- queue.finish();
- auto stopCPU = std::chrono::high_resolution_clock::now();
- auto durationCPU = std::chrono::duration_cast<std::chrono::milliseconds>(stopCPU - startCPU);
- // Read output matrix C from device to host
- queue.enqueueReadBuffer(bufferC, CL_TRUE, 0, sizeof(float) * C.size(), C.data());
- // Measure performance on CPU
- double tflopsCPU = (2.0 * N * N * N) / (static_cast<double>(durationCPU.count()) * 1e6);
- std::cout << "Performance on CPU: " << tflopsCPU << " TFLOPS" << std::endl;
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement