Advertisement
Guest User

Untitled

a guest
Jan 27th, 2019
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. program test
  2.     use cublas
  3.     use iso_fortran_env, only: int64
  4.     implicit none
  5.  
  6.     integer, parameter :: M = 10000, N = 10000, K = 1000
  7.  
  8.     integer(int64) :: t0, t1, rate
  9.  
  10.     integer :: i, j, l
  11.  
  12.     real :: A(M,K), B(K,N), C(M,N), tmp
  13.     real, device :: Ad(M,K), Bd(K,N), Cd(M,N)
  14.  
  15.     call random_number(A)
  16.     call random_number(B)
  17.  
  18.     Ad = A
  19.     Bd = B
  20.  
  21.     ! ----- OPENBLAS -----!
  22.     call system_clock(t0, rate)
  23.     !$acc kernels
  24.     do j = 1, N
  25.         do i = 1, M
  26.             tmp = 0
  27.             do l = 1, K
  28.                 tmp = tmp + A(i,l) * B(l,j)
  29.             end do
  30.             C(i,j) = tmp
  31.         end do
  32.     end do
  33.     !$acc end kernels
  34.     call system_clock(t1, rate)
  35.  
  36.     write(*,*) C(M/2,N/2)
  37.     write(*,*) "t openacc = ", (t1 - t0)/(1.0d0*rate)
  38.  
  39.     ! ----- CUDA FORTRAN -----!
  40.     call system_clock(t0, rate)
  41.     !$cuf kernel do(2) <<<*,*>>>
  42.     do j = 1, N
  43.         do i = 1, M
  44.             tmp = 0
  45.             do l = 1, K
  46.                 tmp = tmp + Ad(i,l) * Bd(l,j)
  47.             end do
  48.             Cd(i,j) = tmp
  49.         end do
  50.     end do
  51.     tmp = Cd(M/2,N/2)
  52.     call system_clock(t1, rate)
  53.     write(*,*) tmp
  54.     write(*,*) "t cuf = ", (t1 - t0)/(1.0d0*rate)
  55.  
  56.     ! ----- CUBLAS -----!
  57.     call system_clock(t0, rate)
  58.     call sgemm('N','N',M,N,K,1.0,Ad,M,Bd,K,0.0,Cd,M)
  59.     tmp = Cd(M/2,N/2)
  60.     call system_clock(t1, rate)
  61.     write(*,*) tmp
  62.     write(*,*) "t cublas = ", (t1 - t0)/(1.0d0*rate)
  63.  
  64.     ! ----- CPU -----!
  65.     call system_clock(t0, rate)
  66.     C = matmul(A,B)
  67.     call system_clock(t1, rate)
  68.     write(*,*) C(M/2,N/2)
  69.     write(*,*) "t cpu = ", (t1 - t0)/(1.0d0*rate)
  70. end program
  71.  
  72.  
  73. ! Output:
  74. !     243.3233
  75. !  t openacc =    0.9237859000000000
  76. !     243.3233
  77. !  t cuf =    0.2142858000000000
  78. !     243.3233
  79. !  t cublas =    0.3022661000000000
  80. !     243.3233
  81. !  t cpu =     40.91081810000000
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement