Advertisement
Guest User

Untitled

a guest
Apr 13th, 2012
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. module functions
  2.  
  3.     integer, parameter :: LUL=128
  4.     integer, parameter :: N=LUL, M=LUL, K=LUL
  5.     integer, parameter :: grid=N  !assume we have "cube" array
  6.     integer, parameter :: dimen=3
  7.     integer, parameter :: prk=4   !float is 4, double is 8
  8.     integer, parameter :: BLOCK_SIZE=16
  9.     integer, parameter :: ITERATIONS=10000000
  10.  
  11.     type Fluids3D
  12.         real(kind=prk) :: density(N, M, K), energy(N, M, K)
  13.         real(kind=prk) :: U(N, M, K, dimen)
  14.         real(kind=prk) :: r(N, M, K, dimen)
  15.     end type
  16.  
  17.     type Fluids
  18.         real(kind=prk) :: density(grid**dimen), energy(grid**dimen)
  19.         real(kind=prk) :: U(grid**dimen,dimen)
  20.         real(kind=prk) :: r(grid**dimen,dimen)
  21.     end type
  22.    
  23.     contains
  24.  
  25.     attributes(global) subroutine calculate3D(in, innext, outhalf)
  26.         type(Fluids3D), device :: in, outhalf, innext
  27.         integer :: i, j, k
  28.      
  29.         i   = threadIdx%z + (blockIdx%z - 1) * blockDim%z
  30.         j   = threadIdx%y + (blockIdx%y - 1) * blockDim%y
  31.         k   = threadIdx%x + (blockIdx%x - 1) * blockDim%x
  32.  
  33.         outhalf%density(i, j, k) = ( in%density(i, j, k) + innext%density(i, j, k) ) / 2.0
  34.         outhalf%energy(i, j, k)  = ( in%energy(i, j, k)  + innext%energy(i, j, k) )  / 2.0
  35.         outhalf%r(i, j, k, :)    = ( in%r(i, j, k, :)    + innext%r(i, j, k, :) )    / 2.0
  36.         outhalf%U(i, j, k, :)    = ( in%U(i, j, k, :)    + innext%U(i, j, k, :) )    / 2.0
  37.     end subroutine calculate3D
  38.  
  39.     attributes(global) subroutine calculate1D(in, innext, outhalf)
  40.         type(Fluids), device :: in, outhalf, innext
  41.         integer :: i, j, k, idx
  42.      
  43.         i   = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
  44.         j   = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
  45.         k   = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
  46.  
  47.         idx = M * K * i + K * j + k + 1
  48.         outhalf%density(idx) = ( in%density(idx) + innext%density(idx) ) / 2.0
  49.         outhalf%energy(idx)  = ( in%energy(idx)  + innext%energy(idx) )  / 2.0
  50.         outhalf%r(idx,:)     = ( in%r(idx,:)     + innext%r(idx,:) )     / 2.0
  51.         outhalf%U(idx,:)     = ( in%U(idx,:)     + innext%U(idx,:) )     / 2.0
  52.     end subroutine calculate1D
  53. end module functions
  54.  
  55. program GPU
  56.     use cudafor
  57.     use cudadevice
  58.     use functions
  59.     implicit none
  60.     integer :: istat, counter=1
  61.     type(Fluids3D), allocatable, device :: DIM3_dev1, DIM3_dev2, DIM3_dev3
  62.     type(Fluids), allocatable, device   :: DIM1_dev1, DIM1_dev2, DIM1_dev3
  63.     type(Fluids3D), allocatable         :: DIM3_host1, DIM3_host2, DIM3_host3
  64.     type(Fluids), allocatable           :: DIM1_host1, DIM1_host2, DIM1_host3
  65.     type(dim3) :: blocks, threads
  66.     type (cudaEvent) :: startEvent, stopEvent, dummyEvent
  67.     real(kind=4) :: time
  68.  
  69.     allocate(DIM3_dev1, DIM3_dev2, DIM3_dev3)
  70.     allocate(DIM3_host1, DIM3_host2, DIM3_host3)
  71.     allocate(DIM1_dev1, DIM1_dev2, DIM1_dev3)
  72.     allocate(DIM1_host1, DIM1_host2, DIM1_host3)
  73.  
  74.     call random_seed
  75.     call random_number(DIM1_host1%density)
  76.     call random_number(DIM1_host1%energy)
  77.     call random_number(DIM1_host1%U)
  78.     call random_number(DIM1_host1%r)
  79.     call random_number(DIM1_host2%density)
  80.     call random_number(DIM1_host2%energy)
  81.     call random_number(DIM1_host2%U)
  82.     call random_number(DIM1_host2%r)
  83.     call random_number(DIM1_host3%density)
  84.     call random_number(DIM1_host3%energy)
  85.     call random_number(DIM1_host3%U)
  86.     call random_number(DIM1_host3%r)
  87.     call random_number(DIM3_host1%density)
  88.     call random_number(DIM3_host1%energy)
  89.     call random_number(DIM3_host1%U)
  90.     call random_number(DIM3_host1%r)
  91.     call random_number(DIM3_host2%density)
  92.     call random_number(DIM3_host2%energy)
  93.     call random_number(DIM3_host2%U)
  94.     call random_number(DIM3_host2%r)
  95.     call random_number(DIM3_host3%density)
  96.     call random_number(DIM3_host3%energy)
  97.     call random_number(DIM3_host3%U)
  98.     call random_number(DIM3_host3%r)
  99.  
  100.     DIM3_dev1 = DIM3_host1
  101.     DIM3_dev2 = DIM3_host2
  102.     DIM3_dev3 = DIM3_host3
  103.     DIM1_dev1 = DIM1_host1
  104.     DIM1_dev2 = DIM1_host2
  105.     DIM1_dev3 = DIM1_host3
  106.    
  107.     if (dimen == 1) then
  108.         if ( mod(N,BLOCK_SIZE) /= 0) then
  109.             blocks = dim3(N / BLOCK_SIZE + 1, 1, 1)
  110.         else
  111.             blocks = dim3(N / BLOCK_SIZE, 1, 1)
  112.         endif
  113.         threads = dim3(BLOCK_SIZE, 1, 1)
  114.     else if (dimen == 2) then
  115.         if ( (mod(N,BLOCK_SIZE) /= 0) .OR. (mod(K,BLOCK_SIZE) /= 0)) then
  116.             blocks = dim3(N / BLOCK_SIZE + 1, K / BLOCK_SIZE + 1, 1)
  117.         else
  118.             blocks = dim3(N / BLOCK_SIZE, K / BLOCK_SIZE, 1)
  119.         endif
  120.         threads = dim3(BLOCK_SIZE, BLOCK_SIZE, 1)
  121.     else
  122.         if ( (mod(N,BLOCK_SIZE) /= 0) .OR. (mod(K,BLOCK_SIZE) /= 0) .OR. (mod(M,BLOCK_SIZE) /= 0)) then
  123.             blocks = dim3(N / BLOCK_SIZE + 1, K / BLOCK_SIZE + 1, BLOCK_SIZE + 1)
  124.         else
  125.             blocks = dim3(N / BLOCK_SIZE, K / BLOCK_SIZE, M / BLOCK_SIZE)
  126.         endif
  127.         threads = dim3(BLOCK_SIZE, BLOCK_SIZE, BLOCK_SIZE)
  128.     endif
  129.  
  130.     print *, "START CUDA 3D!"
  131.     ! run with time step 0.01,
  132.  
  133.     istat = cudaEventCreate ( startEvent )
  134.     istat = cudaEventCreate ( stopEvent )
  135.  
  136.     istat = cudaEventRecord ( startEvent , 0)
  137.  
  138.     do while (counter < ITERATIONS)
  139.         if (mod(counter, ITERATIONS/10) == 0) then
  140.             print *, int(real(counter)/real(ITERATIONS)*100.0), '% done'
  141.         endif
  142.         call calculate3D<<<blocks, threads>>>(DIM3_dev1, DIM3_dev2, DIM3_dev3)
  143.         call calculate3D<<<blocks, threads>>>(DIM3_dev2, DIM3_dev1, DIM3_dev3)
  144.         call calculate3D<<<blocks, threads>>>(DIM3_dev3, DIM3_dev2, DIM3_dev1)
  145.         counter = counter+1
  146.     enddo
  147.  
  148.     istat = cudaEventRecord ( stopEvent , 0)
  149.     istat = cudaEventSynchronize ( stopEvent )
  150.     istat = cudaEventElapsedTime ( time,startEvent,stopEvent )
  151.     print *, "DONE CUDA 3D! Time:", time
  152.  
  153.     print *, "START CUDA 1D!"
  154.  
  155.     istat = cudaEventRecord ( startEvent , 0)
  156.    
  157.     counter = 1
  158.  
  159.     do while (counter < ITERATIONS)
  160.         if (mod(counter, ITERATIONS/10) == 0) then
  161.             print *, int(real(counter)/real(ITERATIONS)*100.0), '% done'
  162.         endif
  163.         call calculate1D<<<blocks, threads>>>(DIM1_dev1, DIM1_dev2, DIM1_dev3)
  164.         call calculate1D<<<blocks, threads>>>(DIM1_dev2, DIM1_dev1, DIM1_dev3)
  165.         call calculate1D<<<blocks, threads>>>(DIM1_dev3, DIM1_dev2, DIM1_dev1)
  166.         counter = counter+1
  167.     enddo
  168.  
  169.     istat = cudaEventRecord ( stopEvent , 0)
  170.     istat = cudaEventSynchronize ( stopEvent )
  171.     istat = cudaEventElapsedTime ( time,startEvent,stopEvent )
  172.     print *, "DONE CUDA 1D! Time:", time
  173.  
  174.     deallocate(DIM3_dev1, DIM3_dev2, DIM3_dev3)
  175.     deallocate(DIM3_host1, DIM3_host2, DIM3_host3)
  176.     deallocate(DIM1_dev1, DIM1_dev2, DIM1_dev3)
  177.     deallocate(DIM1_host1, DIM1_host2, DIM1_host3)
  178.  
  179. end program GPU
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement