Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <arrayfire.h>
- #include <iostream>
- #include <chrono>
- using namespace std::chrono;
- int main(int argc, char ** argv)
- {
- af::setBackend(AF_BACKEND_CUDA);
- af::info();
- int iters = 1000;
- int n = 512;
- // init
- af::array A = af::randu(n, n);
- af::array B = af::randu(n, n);
- // warm up
- af::array C = af::matmul(A, B);
- C.eval();
- af::sync();
- high_resolution_clock::time_point t1 = high_resolution_clock::now();
- for (int t = 0; t < iters; ++t) {
- C = af::matmul(A, B);
- C.eval();
- }
- af::sync();
- high_resolution_clock::time_point t2 = high_resolution_clock::now();
- duration<double> time_span = duration_cast<duration<double>>(t2 - t1);
- std::cout << "arrayfire - matmul: " << time_span.count() << std::endl;
- af::sync();
- t1 = high_resolution_clock::now();
- for (int t = 0; t < iters; ++t) {
- for (int i = 0; i < B.dims(1); ++i) {
- C = af::matmul(A, B(af::span, i));
- C.eval();
- }
- }
- af::sync();
- t2 = high_resolution_clock::now();
- time_span = duration_cast<duration<double>>(t2 - t1);
- std::cout << "arrayfire - sliced matmul - column major: " << time_span.count() << std::endl;
- af::sync();
- t1 = high_resolution_clock::now();
- for (int t = 0; t < iters; ++t) {
- for (int i = 0; i < B.dims(0); ++i) {
- C = af::matmul(B(i, af::span), A);
- C.eval();
- }
- }
- af::sync();
- t2 = high_resolution_clock::now();
- time_span = duration_cast<duration<double>>(t2 - t1);
- std::cout << "arrayfire - sliced matmul - row major: " << time_span.count() << std::endl;
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement