Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- program test
- use cublas
- use iso_fortran_env, only: int64
- implicit none
- integer, parameter :: M = 10000, N = 10000, K = 1000
- integer(int64) :: t0, t1, rate
- integer :: i, j, l
- real :: A(M,K), B(K,N), C(M,N), tmp
- real, device :: Ad(M,K), Bd(K,N), Cd(M,N)
- call random_number(A)
- call random_number(B)
- Ad = A
- Bd = B
- ! ----- OPENBLAS -----!
- call system_clock(t0, rate)
- !$acc kernels
- do j = 1, N
- do i = 1, M
- tmp = 0
- do l = 1, K
- tmp = tmp + A(i,l) * B(l,j)
- end do
- C(i,j) = tmp
- end do
- end do
- !$acc end kernels
- call system_clock(t1, rate)
- write(*,*) C(M/2,N/2)
- write(*,*) "t openacc = ", (t1 - t0)/(1.0d0*rate)
- ! ----- CUDA FORTRAN -----!
- call system_clock(t0, rate)
- !$cuf kernel do(2) <<<*,*>>>
- do j = 1, N
- do i = 1, M
- tmp = 0
- do l = 1, K
- tmp = tmp + Ad(i,l) * Bd(l,j)
- end do
- Cd(i,j) = tmp
- end do
- end do
- tmp = Cd(M/2,N/2)
- call system_clock(t1, rate)
- write(*,*) tmp
- write(*,*) "t cuf = ", (t1 - t0)/(1.0d0*rate)
- ! ----- CUBLAS -----!
- call system_clock(t0, rate)
- call sgemm('N','N',M,N,K,1.0,Ad,M,Bd,K,0.0,Cd,M)
- tmp = Cd(M/2,N/2)
- call system_clock(t1, rate)
- write(*,*) tmp
- write(*,*) "t cublas = ", (t1 - t0)/(1.0d0*rate)
- ! ----- CPU -----!
- call system_clock(t0, rate)
- C = matmul(A,B)
- call system_clock(t1, rate)
- write(*,*) C(M/2,N/2)
- write(*,*) "t cpu = ", (t1 - t0)/(1.0d0*rate)
- end program
- ! Output:
- ! 243.3233
- ! t openacc = 0.9237859000000000
- ! 243.3233
- ! t cuf = 0.2142858000000000
- ! 243.3233
- ! t cublas = 0.3022661000000000
- ! 243.3233
- ! t cpu = 40.91081810000000
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement