! coursework_2011.cuf
!
! FUNCTIONS:
! COURSEW - Entry point of console application.
!
!
!
!****************************************************************************
!
! PROGRAM: GPU_cSPH_TVD
!
! PURPOSE: Use GPU computing for cSPH-TVD method
!
!****************************************************************************
module thrust
interface thrustsort
subroutine sort_int( input,N) bind(C,name="sort_int_wrapper")
use iso_c_binding
integer(c_int),device:: input(*)
integer(c_int),value:: N
end subroutine
subroutine sort_float( input,N) bind(C,name="sort_float_wrapper")
use iso_c_binding
real(c_float),device:: input(*)
integer(c_int),value:: N
end subroutine
subroutine sort_double( input,N) bind(C,name="sort_double_wrapper")
use iso_c_binding
real(c_double),device:: input(*)
integer(c_int),value:: N
end subroutine
end interface
end module thrust
module fluid
use iso_c_binding
implicit none
integer, parameter :: dimen = 1
integer, parameter :: grid = 512
integer, parameter :: prk = 4
type Fluids
! density = P, energy = e
! U = (U_x, U_y, U_z)
! r = (x, y, z)
real(kind=prk) :: density(grid**dimen), energy(grid**dimen)
real(kind=prk) :: U(grid**dimen,dimen) !U_x(:), U_y(:), U_z(:) ! speed components
real(kind=prk) :: r(grid**dimen,dimen) !x(:), y(:), z(:)
end type
type test
real, allocatable :: p(:)
end type
type Fluidstemp
sequence
real(kind=prk) :: z, energy, density
real(kind=prk) :: U(dimen)
real(kind=prk) :: r(dimen)
end type
type Parameters
real(kind=prk) dx ! center of cells = dx/2 + i*dx
integer Nx, Ny, Nz ! dimension number of cells, example: Nx = 10, Ny = 15, Nz = 20 -> [10x15x20]
integer N !total number of cells: [10x15x20] = 3000
real(kind=prk) monaghan_multiplier, smoothing_length
real(kind=prk) gamma, rho_0, c_0, p_0, cnst, eps, courant
real(kind=prk) :: nablaW0(3 ** dimen, dimen)
real(kind=prk) :: ta, tb, tc, minus
integer totalcells, neighbourscount
integer dimensions, dim2d, dim3d !1D, 2D or 3D
integer WNx, WNy
end type
end module fluid
module CUDASphTvd
use cudafor
use cudadevice
use thrust
use fluid
integer, parameter :: BLOCK_SIZE=16
type(Fluids), allocatable, device :: allfluid, allfluidnext, allfluidhalf, allforces !allfluidhalf = (allfluid+allfluidnext) / 2
type(Fluids), allocatable, device :: allfluxes(:)
real(kind=prk), allocatable, device :: time_steps(:)
logical, allocatable, device :: boolflags(:)
type(Parameters), constant :: params
real(kind=prk), allocatable, device :: tau
real(kind=prk), allocatable, device :: tautemp(:)
contains
attributes(global) subroutine cuda_calculate_forces_sph_predictor(in, outforce)
type(Fluids), device :: in, outforce
integer :: i, j, k, idx, idx_k
!temporary variables for speed-up:
real(kind=prk) :: H_i, H_k, sqrU_i
!temporary 'fluid' variables:
real(kind=prk) :: i_density, i_energy, theta_k, tmp
real(kind=prk) :: Ui(dimen), i_impulse(dimen)
real(kind=prk) :: Uk(dimen), tmpW(dimen)
i = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
j = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
k = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
idx = params%Ny * params%Nx * i + params%Nx * j + k + 1
sqrU_i = 0.0
!calculate Fi:
!get fluid_i characteristics (save in temp):
if (in%density(idx) > params%eps) then
Ui(1:params%dimensions) = in%U(idx,:) / in%density(idx)
else
Ui = 0.0
endif
i_density = in%density(idx)
i_energy = in%energy(idx)
i_impulse = 0.0
!calculate H_i
sqrU_i = dot_product(Ui, Ui)
tmp = 2.0 * ( (params%gamma - 1.0) * (i_energy - (i_density * sqrU_i) / 2.0) )
if (tmp > params%eps) then
H_i = sqrt(tmp)
else
H_i = 0.0
endif
!print *, "H_i", tmp
!calculate sum(W_{ik}) and H_k and sum(Ui+Uk):
H_k = 0.0
theta_k = 0.0
i_energy= 0.0
!sum_{k} {Ui +Uk} = sum_{k} {Ui} + sum_{k} {Uk} = k * Ui + sum_{k} {Uk} :
do i_k = i - 1, i + 1
if ( (i_k >= 0) .AND. (i_k < params%Nz) ) then
do j_k = j - 1, j + 1
if ( (j_k >= 0) .AND. (j_k < params%Ny) ) then
do k_k = k - 1, k + 1
if ( (k_k >= 0) .AND. (k_k < params%Nx) ) then
idx_k = params%Ny * params%Nx * i_k + params%Nx * j_k + k_k + 1
if (in%density(idx_k) > params%eps) then
Uk(1:params%dimensions) = in%U(idx_k,:) / in%density(idx_k)
else
Uk = 0.0
endif
sqrU_i = dot_product(Uk, Uk)
tmp = 2.0 * ( (params%gamma - 1.0) * (in%energy(idx_k) - (in%density(idx_k) * sqrU_i) / 2.0))
if (tmp > params%eps) then
H_k = sqrt(tmp)
else
H_k = 0.0
endif
tmpW(1:params%dimensions) = params%nablaW0(params%WNx * params%WNy * (i_k - i + 1) + params%WNx * (j_k - j + 1) + k_k - k + 2,:)
if (H_i > params%eps) then
i_impulse = i_impulse - tmpW * H_k - tmpW * i_density / H_i * ftheta_k(idx_k)
i_energy = i_energy - H_k * dot_product(Ui + Uk, tmpW) / 2.0 - ftheta_k(idx_k) * dot_product(in%U(idx,:), tmpW) / H_i
else
i_impulse = i_impulse - tmpW * H_k
i_energy = i_energy - H_k * dot_product(Ui + Uk, tmpW) / 2.0
endif
endif
enddo
endif
enddo
endif
enddo
!calculate predictor forces:
i_impulse = H_i * i_impulse
i_energy = H_i * i_energy
outforce%density(idx) = 0.0
if (i_density > params%eps) then
!print *, "i_impulse, i_energy", i_impulse, i_energy
outforce%U(idx,:) = i_impulse(1:params%dimensions)
outforce%energy(idx) = i_energy
else
outforce%U(idx,:) = 0.0
outforce%energy(idx) = 0.0
endif
end subroutine
attributes(device) function ftheta_k(indx)
real(kind=prk) :: ftheta_k, coord
integer :: indx
coord = params%smoothing_length / 2.0 + real(indx, kind=prk) * params%smoothing_length
if (indx < params%totalcells/3 .AND. indx > params%totalcells/6) then
! ftheta_k = -4.0 / ( Exp(2.0 * coord) + 2.0 + Exp(-2.0 * coord) )
ftheta_k = params%ta * coord ** 2 + params%tb * coord + params%tc
else
ftheta_k = 0.0
endif
ftheta_k = 0.0
end function ftheta_k
attributes(global) subroutine cuda_calculate_dt(in, outdt)
type(Fluids), device :: in
real(kind=prk), dimension(:), device :: outdt
real(kind=prk) :: Cs
integer :: idx, i, j, k
i = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
j = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
k = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
idx = params%Ny * params%Nx * i + params%Nx * j + k + 1
outdt(idx) = 10000000.0
if (in%density(idx) > params%eps) then
Cs = params%gamma * (params%gamma - 1.0) * (in%energy(idx) - in%density(idx)*dot_product(in%U(idx,:) / in%density(idx),in%U(idx,:) / in%density(idx)) / 2.0) / in%density(idx)
Cs = (abs(Cs > params%eps) * sqrt(abs(Cs)))
outdt(idx) = params%courant * min(params%smoothing_length / (2.0 * maxval(abs(in%U(idx,:) / in%density(idx)))), params%smoothing_length / (maxval(abs(in%U(idx,:) / in%density(idx))) + Cs))
endif
end subroutine cuda_calculate_dt
attributes(global) subroutine cuda_calculate_predictor_sph(in, inforce, outhalf, tau)
integer idx, i, j, k
!temporary 'fluid' variables:
real(kind=prk), device :: tau
type(Fluids), device :: in, inforce, outhalf
!formula 16:
i = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
j = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
k = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
!calculate H_i:
idx = params%Ny * params%Nx * i + params%Nx * j + k + 1
!calculate predictor based on calculated forces inb4:
outhalf%density(idx) = in%density(idx) + (tau / 2.0) * inforce%density(idx)
if (in%density(idx) > params%eps) then
outhalf%U(idx,:) = in%U(idx,:) + tau * inforce%U(idx,:) / 2.0
outhalf%energy(idx) = in%energy(idx) + tau * inforce%energy(idx) / 2.0
outhalf%r(idx,:) = in%r(idx,:) + tau * ( in%U(idx,:) / in%density(idx) + outhalf%U(idx,:) / outhalf%density(idx) ) / 4.0
else
outhalf%U(idx,:) = 0.0
outhalf%energy(idx) = 0.0
outhalf%r(idx,:) = in%r(idx,:)
endif
end subroutine cuda_calculate_predictor_sph
attributes(global) subroutine cuda_calculate_forces_sph_corrector(in, inhalf, outnext)
integer idx, idx_k, i, j, k, i_k, j_k, k_k
!temporary variables for speed-up:
real(kind=prk) H_i, H_k, sqrU_i
!temporary 'fluid' variables:
real(kind=prk) i_density, i_energy, theta_k, tmp
real(kind=prk) :: Ui(dimen), ri(dimen), i_impulse(dimen)
real(kind=prk) :: Uk(dimen), rk(dimen), Wr(dimen), tmpW(dimen)
type(Fluids), device :: in, inhalf, outnext
!formula 16:
i = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
j = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
k = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
!calculate H_i:
sqrU_i = 0.0
idx = params%Ny * params%Nx * i + params%Nx * j + k + 1
!calculate Fi:
!get fluid_i characteristics (save in temp):
if (inhalf%density(idx) > params%eps) then
Ui(1:params%dimensions) = inhalf%U(idx,:) / inhalf%density(idx)
else
Ui = 0.0
endif
ri(1:params%dimensions) = inhalf%r(idx,:)
i_density = inhalf%density(idx)
i_energy = inhalf%energy(idx)
i_impulse = 0.0
!calculate H_i
sqrU_i = dot_product(Ui, Ui)
tmp = 2.0 * ( (params%gamma - 1.0) * (i_energy - (i_density * sqrU_i) / 2.0) )
if (tmp > params%eps) then
H_i = sqrt(tmp)
else
H_i = 0.0
endif
!calculate sum(W_{ik}) and H_k and sum(Ui+Uk):
H_k = 0.0
theta_k = 0.0
i_energy= 0.0
do i_k = i - 1, i + 1
if ( (i_k >= 0) .AND. (i_k < params%Nz) ) then
do j_k = j - 1, j + 1
if ( (j_k >= 0) .AND. (j_k < params%Ny) ) then
do k_k = k - 1, k + 1
if ( (k_k >= 0) .AND. (k_k < params%Nx) ) then
idx_k = params%Ny * params%Nx * i_k + params%Nx * j_k + k_k + 1
if (inhalf%density(idx_k) > params%eps) then
Uk(1:params%dimensions) = inhalf%U(idx_k,:) / inhalf%density(idx_k)
else
Uk = 0.0
endif
sqrU_i = dot_product(Uk, Uk)
tmp = 2.0 * ( (params%gamma - 1.0) * (inhalf%energy(idx_k) - (inhalf%density(idx_k) * sqrU_i) / 2.0) )
if (tmp > params%eps) then
H_k = sqrt(tmp)
else
H_k = 0.0
endif
rk(1:params%dimensions) = inhalf%r(idx_k,:)
call calculateSmoothingGradientW(ri(1:params%dimensions), rk(1:params%dimensions), Wr(1:params%dimensions))
tmpW = params%nablaW0(params%WNx * params%WNy * (i_k - i + 1) + params%WNx * (j_k - j + 1) + k_k - k + 2,:)
if (H_i > params%eps) then
i_impulse(1:params%dimensions) = i_impulse(1:params%dimensions) - Wr(1:params%dimensions) * H_k - tmpW * i_density / H_i * ftheta_k(idx_k)
i_energy = i_energy - H_k * dot_product(Ui + Uk, Wr) / 2.0 - ftheta_k(idx_k) * dot_product(inhalf%U(idx,:), tmpW) / H_i
else
i_impulse(1:params%dimensions) = i_impulse(1:params%dimensions) - Wr(1:params%dimensions) * H_k
i_energy = i_energy - H_k * dot_product(Ui + Uk, Wr) / 2.0
endif
endif
enddo
endif
enddo
endif
enddo
!calculate corrector forces:
i_impulse(1:params%dimensions) = H_i * i_impulse(1:params%dimensions)
i_energy = H_i * i_energy
! call syncthreads()
outnext%density(idx) = in%density(idx) + 0.0 ! + 0.0 * (tau / 2.0)
if (i_density > params%eps) then
outnext%U(idx,:) = in%U(idx,:) + tau * i_impulse(1:params%dimensions)
outnext%energy(idx) = in%energy(idx) + tau * i_energy
outnext%r(idx,:) = in%r(idx,:) + tau * ( in%U(idx,:) / i_density + outnext%U(idx,:) / i_density ) / 2.0
else
outnext%U(idx,:) = 0.0
outnext%energy(idx) = 0.0
outnext%r(idx,:) = in%r(idx,:)
endif
!calculate q(n+1/2):
! inhalf%density(idx) = ( in%density(idx) + outnext%density(idx) ) / 2.0
! inhalf%energy(idx) = ( in%energy(idx) + outnext%energy(idx) ) / 2.0
! inhalf%r(idx,:) = ( in%r(idx,:) + outnext%r(idx,:) ) / 2.0
! inhalf%U(idx,:) = ( in%U(idx,:) + outnext%U(idx,:) ) / 2.0
end subroutine cuda_calculate_forces_sph_corrector
attributes(global) subroutine cuda_calculate_fluidhalf(in, innext, outhalf)
type(Fluids), device :: in, outhalf, innext
integer :: i, j, k, idx
!calculate q(n+1/2):
i = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
j = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
k = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
idx = params%Ny * params%Nx * i + params%Nx * j + k + 1
outhalf%density(idx) = ( in%density(idx) + innext%density(idx) ) / 2.0
outhalf%energy(idx) = ( in%energy(idx) + innext%energy(idx) ) / 2.0
outhalf%r(idx,:) = ( in%r(idx,:) + innext%r(idx,:) ) / 2.0
outhalf%U(idx,:) = ( in%U(idx,:) + innext%U(idx,:) ) / 2.0
end subroutine cuda_calculate_fluidhalf
attributes(global) subroutine cuda_calculate_flux_tvd(in, inhalf, outq0, outfluxes, outflags)
integer idx, idx_k, i, j, k, i_k, j_k, k_k, dims, dim2dim, dim3dim
integer, dimension(:) :: offset(3), ijk(3)
logical, dimension(:), device :: outflags
type(Fluids), device :: in, inhalf, outq0 !in = n+1 from corrector !outq0 = n
type(Fluids), device :: outfluxes(dimen)
type(Fluidstemp) :: neighbrs(3 + (dimen - 1) * 2), fluidhalf(4), ql, qr, Fl, Fr
real(kind=prk) :: dr1, dr2, dr3, L, pl, Cl
real(kind=prk) :: Vrl(dimen), Vrr(dimen)
real(kind=prk) :: aa
! print *, "calculate_flux_tvd"
! neighbours = 3 + (params%dimensions - 1) * 2
! print *, "calculate_flux_tvd debug"
neighbrs%density = 0.0
neighbrs%energy = 0.0
fluidhalf%density = 0.0
fluidhalf%energy = 0.0
do i = 1, params%dimensions
neighbrs(:)%r(i) = 0.0
neighbrs(:)%U(i) = 0.0
fluidhalf(:)%r(i) = 0.0
fluidhalf(:)%U(i) = 0.0
enddo
offset(1) = 1
offset(2) = params%Nx
offset(3) = params%Nx * params%Ny
dim2dim = abs(params%dimensions == 1) * 1
dim3dim = abs(params%dimensions < 3) * 1
i = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
j = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
k = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
idx = params%Ny * params%Nx * i + params%Nx * j + k + 1
do dims = 1, params%dimensions
outfluxes(dims)%density(idx) = 0.0
outfluxes(dims)%energy(idx) = 0.0
outfluxes(dims)%U(idx,:) = 0.0
enddo
!for each particle do:
if ( (i >= 2 - params%dim3d) .AND. (i <= params%Nz-2 + dim3dim) ) then
if ( (j >= 2 - params%dim2d) .AND. (j <= params%Ny-2 + dim2dim) ) then
if ( (k >= 2) .AND. (k <= params%Nx-2) ) then
ijk(1) = k; ijk(2) = j; ijk(3) = i;
!get center cell (we will calculate fluxes relatively this cell):
! print *, "idx, ", idx
! print *, "ijk, ", ijk
!take all neighbours (1d: 3, 2d: 5(cross), 3d: 7(3d-cross)):
if ( (k - 1 >= 0) .AND. (k - 1 < params%Nx) ) then
neighbrs(1)%density = in%density(idx-1)
neighbrs(1)%energy = in%energy(idx-1)
neighbrs(1)%r(:) = in%r(idx-1,:)
neighbrs(1)%U(:) = in%U(idx-1,:)
endif
neighbrs(2)%density = in%density(idx)
neighbrs(2)%energy = in%energy(idx)
neighbrs(2)%r(:) = in%r(idx,:)
neighbrs(2)%U(:) = in%U(idx,:)
if ( (k + 1 >= 0) .AND. (k + 1 < params%Nx) ) then
neighbrs(3)%density = in%density(idx+1)
neighbrs(3)%energy = in%energy(idx+1)
neighbrs(3)%r(:) = in%r(idx+1,:)
neighbrs(3)%U(:) = in%U(idx+1,:)
endif
!add neighbours if dim > 1:
if ( params%dimensions > 1 ) then
if ( (j - 1 >= 0) .AND. (j - 1 < params%Ny) ) then
idx_k = idx - params%Nx !params%Ny * params%Nx * i_k + params%Nx * j_k + k_k + 1
neighbrs(4)%density = in%density(idx_k)
neighbrs(4)%energy = in%energy(idx_k)
neighbrs(4)%r(:) = in%r(idx_k,:)
neighbrs(4)%U(:) = in%U(idx_k,:)
endif
if ( (j + 1 >= 0) .AND. (j + 1 < params%Ny) ) then
idx_k = idx + params%Nx
neighbrs(5)%density = in%density(idx_k)
neighbrs(5)%energy = in%energy(idx_k)
neighbrs(5)%r(:) = in%r(idx_k,:)
neighbrs(5)%U(:) = in%U(idx_k,:)
endif
endif
!add neighbours if dim > 2:
if ( params%dimensions > 2 ) then
if ( (i - 1 >= 0) .AND. (i - 1 < params%Nz) ) then
idx_k = idx - params%Nx * params%Ny !params%Ny * params%Nx * i_k + params%Nx * j_k + k_k + 1
neighbrs(6)%density = in%density(idx_k)
neighbrs(6)%energy = in%energy(idx_k)
neighbrs(6)%r(:) = in%r(idx_k,:)
neighbrs(6)%U(:) = in%U(idx_k,:)
endif
if ( (i + 1 >= 0) .AND. (i + 1 < params%Nz) ) then
idx_k = idx + params%Nx * params%Ny
neighbrs(7)%density = in%density(idx_k)
neighbrs(7)%energy = in%energy(idx_k)
neighbrs(7)%r(:) = in%r(idx_k,:)
neighbrs(7)%U(:) = in%U(idx_k,:)
endif
endif
!Okay, now lets decide if we need to calculate fluxes (check density):
!take components based on current dimension:
if (maxval(neighbrs%density) > params%eps) then
outflags(idx) = .TRUE.
do dims = 1, params%dimensions
do i_k = 0, 3
fluidhalf(i_k+1)%density = inhalf%density(idx + offset(dims) * (i_k - 2))
fluidhalf(i_k+1)%energy = inhalf%energy(idx + offset(dims) * (i_k - 2))
fluidhalf(i_k+1)%U(:) = inhalf%U(idx + offset(dims) * (i_k - 2),:)
fluidhalf(i_k+1)%r(:) = inhalf%r(idx + offset(dims) * (i_k - 2),:)
enddo
!calc ql:
dr1 = fluidhalf(2)%r(dims) - fluidhalf(1)%r(dims)
dr2 = fluidhalf(3)%r(dims) - fluidhalf(2)%r(dims)
dr3 = fluidhalf(4)%r(dims) - fluidhalf(3)%r(dims)
if (fluidhalf(2)%density > params%eps) then
drx = (params%smoothing_length / 2.0 - (fluidhalf(2)%r(dims) - (params%dx/2 + (ijk(dims) - 1) * params%dx)) / 2.0)
call minmod((fluidhalf(3)%density - fluidhalf(2)%density) / dr2, (fluidhalf(2)%density - fluidhalf(1)%density) / dr1, L)
ql%density = fluidhalf(2)%density + drx * L
call minmod((fluidhalf(3)%energy - fluidhalf(2)%energy) / dr2, (fluidhalf(2)%energy - fluidhalf(1)%energy) / dr1, L)
ql%energy = fluidhalf(2)%energy + drx * L
do j_k = 1, params%dimensions
call minmod((fluidhalf(3)%U(j_k) - fluidhalf(2)%U(j_k)) / dr2, (fluidhalf(2)%U(j_k) - fluidhalf(1)%U(j_k)) / dr1, L)
ql%U(j_k) = fluidhalf(2)%U(j_k) + drx * L
enddo
! Vrl = ql%U(1,:) !Vrl(1) = Vxl, Vrl(2) = Vyl, Vrl(3) = Vzl
if (ql%density > params%eps) then
Vrl = ql%U(:) / ql%density
pl = abs((params%gamma - 1.0) * (ql%energy - ql%density*dot_product(Vrl,Vrl) / 2.0) > params%eps) * (params%gamma - 1.0) * (ql%energy - ql%density*dot_product(Vrl,Vrl) / 2.0)
Cl = sqrt(params%gamma * pl / ql%density)
else
Vrl = 0.0
pl = abs((params%gamma - 1.0) * (ql%energy - ql%density*dot_product(Vrl,Vrl) / 2.0) > params%eps) * (params%gamma - 1.0) * (ql%energy - ql%density*dot_product(Vrl,Vrl) / 2.0)
Cl = 0.0
endif
Fl%density = ql%U(dims)
Fl%U(:) = ql%U(:) * Vrl(dims)
Fl%energy = ql%energy * Vrl(dims)
else
ql%density = 0.0
ql%energy = 0.0
ql%U(:) = 0.0
Fl%density = 0.0
Fl%energy = 0.0
Fl%U(:) = 0.0
Cl = 0.0
Vrl = 0.0
endif
! print *, "t_debug4"
!calc qr:
if (fluidhalf(3)%density > params%eps) then
drx = (params%smoothing_length / 2.0 + (fluidhalf(3)%r(dims) - (params%dx / 2.0 + ijk(dims) * params%dx)) / 2.0)
call minmod((fluidhalf(3)%density - fluidhalf(2)%density) / dr2, (fluidhalf(4)%density - fluidhalf(3)%density) / dr3, L)
qr%density = fluidhalf(3)%density - drx * L
call minmod((fluidhalf(3)%energy - fluidhalf(2)%energy) / dr2, (fluidhalf(4)%energy - fluidhalf(3)%energy) / dr3, L)
qr%energy = fluidhalf(3)%energy - drx * L
do j_k = 1, params%dimensions
call minmod((fluidhalf(3)%U(j_k) - fluidhalf(2)%U(j_k)) / dr2, (fluidhalf(4)%U(j_k) - fluidhalf(3)%U(j_k)) / dr3, L)
qr%U(j_k) = fluidhalf(3)%U(j_k) - drx * L
enddo
! Vrr = qr%U(1,:) !Vrr(1) = Vxr, Vrr(2) = Vyr, Vrr(3) = Vzr
if (qr%density > params%eps) then
Vrr = qr%U(:) / qr%density
pr = abs((params%gamma - 1.0) * (qr%energy - qr%density*dot_product(Vrr,Vrr) / 2.0) > params%eps) * (params%gamma - 1.0) * (qr%energy - qr%density*dot_product(Vrr,Vrr) / 2.0)
Cr = sqrt(params%gamma * pr / qr%density)
else
Cr = 0.0
Vrr = 0.0
pr = abs((params%gamma - 1.0) * (qr%energy - qr%density*dot_product(Vrr,Vrr) / 2.0) > params%eps) * (params%gamma - 1.0) * (qr%energy - qr%density*dot_product(Vrr,Vrr) / 2.0)
endif
Fr%density = qr%U(dims)
Fr%U(:) = qr%U(:) * Vrr(dims)
Fr%energy = qr%energy * Vrr(dims)
else
qr%density = 0.0
qr%energy = 0.0
qr%U(:) = 0.0
Fr%density = 0.0
Fr%energy = 0.0
Fr%U(:) = 0.0
Cr = 0.0
Vrr = 0.0
endif
!calc Fx:
if ((Cl > params%eps) .OR. (Cr > params%eps)) then
Ll = min(Vrl(dims) - Cl, Vrr(dims) - Cr)
Lr = max(Vrr(dims) + Cr, Vrl(dims) + Cl)
bb = max(abs(Ll), abs(Lr))
outfluxes(dims)%density(idx) = ( Fr%density + Fl%density + bb * (ql%density - qr%density) ) / 2.0
outfluxes(dims)%energy(idx) = ( Fr%energy + Fl%energy + bb * (ql%energy - qr%energy) ) / 2.0
outfluxes(dims)%U(idx,:) = ( Fr%U(:) + Fl%U(:) + bb * (ql%U(:) - qr%U(:)) ) / 2.0
else
outfluxes(dims)%density(idx) = 0.0
outfluxes(dims)%energy(idx) = 0.0
outfluxes(dims)%U(idx,:) = 0.0
endif
enddo
else
outflags(idx) = .FALSE.
do dims = 1, params%dimensions
outfluxes(dims)%density(idx) = 0.0
outfluxes(dims)%energy(idx) = 0.0
outfluxes(dims)%U(idx,:) = 0.0
enddo
endif
! outfluxes(:)%density(idx) = 0.0
endif
endif
endif
end subroutine cuda_calculate_flux_tvd
attributes(global) subroutine cuda_finalize(outq0, in, influxes, inflags)
type(Fluids), device :: in, outq0 !in = n+1 from corrector !outq0 = n
type(Fluids), device :: influxes(dimen)
type(Fluidstemp) :: fluidhalf(dimen)
integer, dimension(:) :: offset(3)
logical, dimension(:), device :: inflags
integer :: dims, i, j, k, idx
offset(1) = 1
offset(2) = params%Nx
offset(3) = params%Nx * params%Ny
i = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
j = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
k = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
idx = params%Ny * params%Nx * i + params%Nx * j + k + 1
if ( (i >= 2 - params%dim3d) .AND. (i <= params%Nz-3 + params%dim3d) ) then
if ( (j >= 2 - params%dim2d) .AND. (j <= params%Ny-3 + params%dim2d) ) then
if ( (k >= 2) .AND. (k <= params%Nx-3) ) then
if (inflags(idx)) then
!calculate all neighbour fluxes (idx + offset(dims)):
do dims = 1, params%dimensions
fluidhalf(dims)%density = influxes(dims)%density(idx + offset(dims))
fluidhalf(dims)%energy = influxes(dims)%energy(idx + offset(dims))
fluidhalf(dims)%U(:) = influxes(dims)%U(idx + offset(dims),:)
enddo
!calculate final values:
outq0%density(idx) = in%density(idx) + tau / params%smoothing_length * (sum(influxes(:)%density(idx)) - sum(fluidhalf(1:params%dimensions)%density))
outq0%energy(idx) = in%energy(idx) + tau / params%smoothing_length * (sum(influxes(:)%energy(idx)) - sum(fluidhalf(1:params%dimensions)%energy))
do dims = 1, params%dimensions
outq0%U(idx,dims) = in%U(idx,dims) + tau / params%smoothing_length * (sum(influxes(:)%U(idx,dims)) - sum(fluidhalf(1:params%dimensions)%U(dims)))
enddo
else
outq0%density(idx) = in%density(idx)
outq0%energy(idx) = in%energy(idx)
outq0%U(idx,:) = in%U(idx,:)
endif
endif
endif
endif
end subroutine cuda_finalize
attributes(device) subroutine minmod(a, b, out)
real(kind=prk), device :: a, b, out
out = ( 1.0 / 2.0) * (sign(1.0, a) + sign(1.0, b)) * min(abs(a), abs(b))
end subroutine minmod
subroutine cuda_setupSystem(inparams)
end subroutine
attributes(global) subroutine cuda_boundary_conditions(ptr)
integer j, k, idx, idx_j, idx_k
type(Fluids), device :: ptr
integer ti, tj, tk, tidx
ti = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
tj = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
tk = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
! idx = params%Ny * params%Nx * i + params%Nx * j + k + 1
if (params%dimensions == 1) then
ptr%density(1) = ptr%density(3)
ptr%density(2) = ptr%density(1)
ptr%density(params%Nx) = ptr%density(params%Nx - 2)
ptr%density(params%Nx-1) = ptr%density(params%Nx)
ptr%energy(1) = ptr%energy(3)
ptr%energy(2) = ptr%energy(1)
ptr%energy(params%Nx) = ptr%energy(params%Nx - 2)
ptr%energy(params%Nx-1) = ptr%energy(params%Nx)
ptr%U(1,1) = ptr%U(3,1)
ptr%U(2,1) = ptr%U(1,1)
ptr%U(params%Nx,1) = ptr%U(params%Nx - 2,1)
ptr%U(params%Nx-1,1) = ptr%U(params%Nx,1)
endif
if (params%dimensions == 2) then
! copy x:
if ( (k >= 0) .AND. (k <= params%Nx-1) ) then
! do k = 0, params%Nx-1
idx = k + 1
idx_j = params%Nx + idx
idx_k = 2 * params%Nx + idx
ptr%density(idx) = ptr%density(idx_k)
ptr%density(idx_j) = ptr%density(idx)
ptr%density((params%Ny - 1) * params%Nx + idx) = ptr%density((params%Ny - 3) * params%Nx + idx)
ptr%density((params%Ny - 2) * params%Nx + idx) = ptr%density((params%Ny - 1) * params%Nx + idx)
ptr%energy(idx) = ptr%energy(idx_k)
ptr%energy(idx_j) = ptr%energy(idx)
ptr%energy((params%Ny - 1) * params%Nx + idx) = ptr%energy((params%Ny - 3) * params%Nx + idx)
ptr%energy((params%Ny - 2) * params%Nx + idx) = ptr%energy((params%Ny - 1) * params%Nx + idx)
ptr%U(idx,1) = ptr%U(idx_k,1)
ptr%U(idx_j,1) = ptr%U(idx,1)
ptr%U(idx,2) = ptr%U(idx_k,2)
ptr%U(idx_j,2) = ptr%U(idx,2)
ptr%U((params%Ny - 1) * params%Nx + idx,1) = ptr%U((params%Ny - 3) * params%Nx + idx,1)
ptr%U((params%Ny - 2) * params%Nx + idx,1) = ptr%U((params%Ny - 1) * params%Nx + idx,1)
ptr%U((params%Ny - 1) * params%Nx + idx,2) = ptr%U((params%Ny - 3) * params%Nx + idx,2)
ptr%U((params%Ny - 2) * params%Nx + idx,2) = ptr%U((params%Ny - 1) * params%Nx + idx,2)
endif
! copy y:
if ( (j >= 0) .AND. (j <= params%Ny-1) ) then
! do k = 0, params%Ny-1
idx = params%Nx * j + 1
idx_j = idx + 1
idx_k = idx + 2
ptr%density(idx) = ptr%density(idx_k)
ptr%density(idx_j) = ptr%density(idx)
ptr%density(params%Nx + idx - 1) = ptr%density(params%Nx + idx - 3)
ptr%density(params%Nx + idx - 2) = ptr%density(params%Nx + idx - 1)
ptr%energy(idx) = ptr%energy(idx_k)
ptr%energy(idx_j) = ptr%energy(idx)
ptr%energy(params%Nx + idx - 1) = ptr%energy(params%Nx + idx - 3)
ptr%energy(params%Nx + idx - 2) = ptr%energy(params%Nx + idx - 1)
ptr%U(idx,1) = ptr%U(idx_k,1)
ptr%U(idx_j,1) = ptr%U(idx,1)
ptr%U(idx,2) = ptr%U(idx_k,2)
ptr%U(idx_j,2) = ptr%U(idx,2)
ptr%U(params%Nx + idx - 1,1) = ptr%U(params%Nx + idx - 3,1)
ptr%U(params%Nx + idx - 2,1) = ptr%U(params%Nx + idx - 1,1)
ptr%U(params%Nx + idx - 1,2) = ptr%U(params%Nx + idx - 3,2)
ptr%U(params%Nx + idx - 2,2) = ptr%U(params%Nx + idx - 1,2)
endif
endif
if (params%dimensions == 3) then
!copy (for [y x z])
if ( (j >= 0) .AND. (j <= params%Nz-1) ) then
if ( (k >= 0) .AND. (k <= params%Ny-1) ) then
idx = params%Nx * params%Ny * j + params%Nx * k + 1
idx_j = idx + 1
idx_k = idx + 2
ptr%density(idx) = ptr%density(idx_k)
ptr%density(idx_j) = ptr%density(idx)
ptr%density(params%Nx + idx - 1) = ptr%density(params%Nx + idx - 3)
ptr%density(params%Nx + idx - 2) = ptr%density(params%Nx + idx - 1)
ptr%energy(idx) = ptr%energy(idx_k)
ptr%energy(idx_j) = ptr%energy(idx)
ptr%energy(params%Nx + idx - 1) = ptr%energy(params%Nx + idx - 3)
ptr%energy(params%Nx + idx - 2) = ptr%energy(params%Nx + idx - 1)
ptr%U(idx,1) = ptr%U(idx_k,1)
ptr%U(idx_j,1) = ptr%U(idx,1)
ptr%U(idx,2) = ptr%U(idx_k,2)
ptr%U(idx_j,2) = ptr%U(idx,2)
ptr%U(idx,3) = ptr%U(idx_k,3)
ptr%U(idx_j,3) = ptr%U(idx,3)
ptr%U(params%Nx + idx - 1,1) = ptr%U(params%Nx + idx - 3,1)
ptr%U(params%Nx + idx - 2,1) = ptr%U(params%Nx + idx - 1,1)
ptr%U(params%Nx + idx - 1,2) = ptr%U(params%Nx + idx - 3,2)
ptr%U(params%Nx + idx - 2,2) = ptr%U(params%Nx + idx - 1,2)
ptr%U(params%Nx + idx - 1,3) = ptr%U(params%Nx + idx - 3,3)
ptr%U(params%Nx + idx - 2,3) = ptr%U(params%Nx + idx - 1,3)
endif
endif
!copy (for [x x z])
if ( (j >= 0) .AND. (j <= params%Nz-1) ) then
if ( (k >= 0) .AND. (k <= params%Nx-1) ) then
idx = params%Nx * params%Ny * j + k + 1
idx_j = idx + params%Nx
idx_k = idx + 2 * params%Nx
ptr%density(idx) = ptr%density(idx_k)
ptr%density(idx_j) = ptr%density(idx)
ptr%density((params%Ny - 1) * params%Nx + idx) = ptr%density((params%Ny - 3) * params%Nx + idx)
ptr%density((params%Ny - 2) * params%Nx + idx) = ptr%density((params%Ny - 1) * params%Nx + idx)
ptr%energy(idx) = ptr%energy(idx_k)
ptr%energy(idx_j) = ptr%energy(idx)
ptr%energy((params%Ny - 1) * params%Nx + idx) = ptr%energy((params%Ny - 3) * params%Nx + idx)
ptr%energy((params%Ny - 2) * params%Nx + idx) = ptr%energy((params%Ny - 1) * params%Nx + idx)
ptr%U(idx,1) = ptr%U(idx_k,1)
ptr%U(idx_j,1) = ptr%U(idx,1)
ptr%U(idx,2) = ptr%U(idx_k,2)
ptr%U(idx_j,2) = ptr%U(idx,2)
ptr%U(idx,3) = ptr%U(idx_k,3)
ptr%U(idx_j,3) = ptr%U(idx,3)
ptr%U((params%Ny - 1) * params%Nx + idx,1) = ptr%U((params%Ny - 3) * params%Nx + idx,1)
ptr%U((params%Ny - 2) * params%Nx + idx,1) = ptr%U((params%Ny - 1) * params%Nx + idx,1)
ptr%U((params%Ny - 1) * params%Nx + idx,2) = ptr%U((params%Ny - 3) * params%Nx + idx,2)
ptr%U((params%Ny - 2) * params%Nx + idx,2) = ptr%U((params%Ny - 1) * params%Nx + idx,2)
ptr%U((params%Ny - 1) * params%Nx + idx,3) = ptr%U((params%Ny - 3) * params%Nx + idx,3)
ptr%U((params%Ny - 2) * params%Nx + idx,3) = ptr%U((params%Ny - 1) * params%Nx + idx,3)
endif
endif
!copy (for [x x y])
if ( (j >= 0) .AND. (j <= params%Ny-1) ) then
if ( (k >= 0) .AND. (k <= params%Nx-1) ) then
idx = params%Nx * j + k + 1
idx_j = idx + params%Nx * params%Ny
idx_k = idx + params%Nx * params%Ny * 2
ptr%density(idx) = ptr%density(idx_k)
ptr%density(idx_j) = ptr%density(idx)
ptr%density(params%Nx * params%Ny * (params%Nz - 1) + idx) = ptr%density(params%Nx * params%Ny * (params%Nz - 3) + idx)
ptr%density(params%Nx * params%Ny * (params%Nz - 2) + idx) = ptr%density(params%Nx * params%Ny * (params%Nz - 1) + idx)
ptr%energy(idx) = ptr%energy(idx_k)
ptr%energy(idx_j) = ptr%energy(idx)
ptr%energy(params%Nx * params%Ny * (params%Nz - 1) + idx) = ptr%energy(params%Nx * params%Ny * (params%Nz - 3) + idx)
ptr%energy(params%Nx * params%Ny * (params%Nz - 2) + idx) = ptr%energy(params%Nx * params%Ny * (params%Nz - 1) + idx)
ptr%U(idx,1) = ptr%U(idx_k,1)
ptr%U(idx_j,1) = ptr%U(idx,1)
ptr%U(idx,2) = ptr%U(idx_k,2)
ptr%U(idx_j,2) = ptr%U(idx,2)
ptr%U(idx,3) = ptr%U(idx_k,3)
ptr%U(idx_j,3) = ptr%U(idx,3)
ptr%U(params%Nx * params%Ny * (params%Nz - 1) + idx,1) = ptr%U(params%Nx * params%Ny * (params%Nz - 3) + idx,1)
ptr%U(params%Nx * params%Ny * (params%Nz - 2) + idx,1) = ptr%U(params%Nx * params%Ny * (params%Nz - 1) + idx,1)
ptr%U(params%Nx * params%Ny * (params%Nz - 1) + idx,2) = ptr%U(params%Nx * params%Ny * (params%Nz - 3) + idx,2)
ptr%U(params%Nx * params%Ny * (params%Nz - 2) + idx,2) = ptr%U(params%Nx * params%Ny * (params%Nz - 1) + idx,2)
ptr%U(params%Nx * params%Ny * (params%Nz - 1) + idx,3) = ptr%U(params%Nx * params%Ny * (params%Nz - 3) + idx,3)
ptr%U(params%Nx * params%Ny * (params%Nz - 2) + idx,3) = ptr%U(params%Nx * params%Ny * (params%Nz - 1) + idx,3)
endif
endif
endif
end subroutine
attributes(global) subroutine initial_conditions(allf)
type(Fluids), device :: allf
integer :: i, j, k, dims, ijk(3), N
i = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
j = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
k = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
N = params%Nx
idx = params%Ny * params%Nx * i + params%Nx * j + k + 1
ijk(1) = k
ijk(2) = j
ijk(3) = i
! print *, "idx", idx
! print *, "ijk", i, j, k
if (idx > (params%totalcells * 0.51) .AND. idx < (params%totalcells * 0.61)) then ! wave
allfluid%density(idx) = 11.3
allfluid%U(idx,:) = -22.2
allfluid%energy(idx) = 45.0
! if (idx < (params%totalcells * 0.4)) then
else if (idx > (params%totalcells * 0.39) .AND. idx < (params%totalcells * 0.49)) then ! wave
allfluid%density(idx) = 11.3
allfluid%U(idx,:) = 22.2
allfluid%energy(idx) = 45.0
else
allfluid%density(idx) = 11.3
allfluid%U(idx,:) = 0.0
allfluid%energy(idx) = 0.0
endif
! else
! allfluid%density(idx) = 20.0
! allfluid%U(idx,:) = 0.0
! allfluid%energy(idx) = 0.0
! endif
do dims = 1, params%dimensions
allf%r(idx,dims) = params%dx / 2.0 + ijk(dims) * params%dx
enddo
end subroutine
subroutine calculateSmoothingGradientW0(inp)
type(Parameters) :: inp
real(kind=prk) dW, s
real(kind=prk) dr_ik(inp%dimensions)
integer i, j, k, i_max, j_max, k_max, dims, ijk(inp%dimensions)
if ( inp%dimensions == 3 ) then
i_max = 3; j_max = 3; k_max = 3;
else if ( inp%dimensions == 2 ) then
i_max = 1; j_max = 3; k_max = 3;
else
i_max = 1; j_max = 1; k_max = 3;
endif
do i = 0, i_max-1
do j = 0, j_max-1
do k = 0, k_max-1
if (inp%dimensions == 1) then
ijk(1) = k;
else if (inp%dimensions == 2) then
ijk(1) = j; ijk(2) = k;
else
ijk(1) = i; ijk(2) = j; ijk(3) = k;
endif
do dims = 1, inp%dimensions
dr_ik(dims) = inp%smoothing_length*(1-ijk(dims))
enddo
s = dot_product(dr_ik, dr_ik)
s = sqrt(s);
dW = gradW(s, inp);
inp%nablaW0(inp%WNx * inp%WNx * i + inp%WNx * j + k + 1, :) = dW * dr_ik
enddo
enddo
enddo
end subroutine
function gradW(s, inp)
type(Parameters), intent(in) :: inp
real(kind=prk) :: s, ss, dW, gradW
ss = s / inp%smoothing_length
if ((ss > 0) .AND. (ss < 1.0)) then
dW = (-3.0 + 2.25 * ss ) * ss
else if((ss < 2.0) .AND. (ss >= 1.0)) then
dW = (2.0 - ss); dW = -0.75 * dW * dW
else
dW=0.0;
endif
if(s > inp%eps) then
gradW = inp%cnst * dW / s / inp%smoothing_length
else
gradW = 0.0
endif
end function gradW
attributes(device) subroutine calculateSmoothingGradientW(r_i, r_k, W)
real(kind=prk) q, s
real(kind=prk), device :: r_i(dimen), r_k(dimen), W(dimen)
s = sqrt(SUM((r_i - r_k) ** 2))
q = s / params%smoothing_length
if (s > params%eps) then
!if s (0;1]
if ( (q > 0) .AND. (q <= 1.0) ) then
W = (r_i - r_k) * params%cnst * ( -3.0 + 2.25 * q ) * q / s / params%smoothing_length
! W = params%cnst * (r_k - r_i) * (4.0 * params%smoothing_length - 3.0 * s)
!if s (1;2] :
else if ( (q > 1.0) .AND. (q <= 2.0) ) then
W = (r_i - r_k) * params%cnst * ( -0.75 * (2.0 - q) ** 2 ) / s / params%smoothing_length
! W = params%cnst * (r_k - r_i) * ( (s - 2.0 * params%smoothing_length) ** 2 ) / s
else
W = 0.0
endif
else
W = 0.0
endif
end subroutine
subroutine cuda_run(t_save, t_end, t_step)
real(kind=prk) :: t_save, t_end, t_step
real(kind=prk) :: total_time, current_time, end_time, save_time
real(kind=prk) :: iter, h_tau
type(dim3) :: blocks, threads
character(len=70) :: fn, tmp
type(Fluids) :: h_all
type(Fluidstemp), device :: minz, maxz
type(Fluidstemp) :: h_minz, h_maxz
type(Parameters) :: outp
type ( cudaEvent ) :: startEvent , stopEvent
real :: time, random, tmpa
integer :: istat
integer, parameter :: outunit=44
integer :: filenum, i
logical :: flag
outp = params
if (outp%dimensions == 1) then
blocks = dim3(outp%Nx / BLOCK_SIZE, 1, 1)
threads = dim3(BLOCK_SIZE, 1, 1)
else if (outp%dimensions == 2) then
blocks = dim3(outp%Nx / BLOCK_SIZE, outp%Ny / BLOCK_SIZE, 1)
threads = dim3(BLOCK_SIZE, BLOCK_SIZE, 1)
else
blocks = dim3(outp%Nx / BLOCK_SIZE, outp%Ny / BLOCK_SIZE, outp%Nz / BLOCK_SIZE)
threads = dim3(BLOCK_SIZE, BLOCK_SIZE, BLOCK_SIZE)
endif
!print *, "run!"
flag = .FALSE.
filenum = 1
current_time = 0.0
save_time = 0.0
iter = 1.0
end_time = t_end
time_steps = 1000000.0
call initial_conditions<<<blocks, threads>>>(allfluid)
!FTheta plot:
h_all = allfluid
! build filename -- i.grd
write(fn,fmt='(a)') 'ftheta.txt'
! open it with a fixed unit number
open(unit=outunit,file=fn, form='formatted')
! write something
do i = 1, outp%totalcells
write(outunit, '(F0.7,x,F0.7)') h_all%r(i,1), host_ftheta_k(i, outp)
enddo
! close it
close(outunit)
!Calculate:
do while (current_time < end_time)
write(tmp,fmt='(a,i0,a)') '(',outp%Nx, 'F)'
! do while (iter < 6)
call cuda_boundary_conditions<<<blocks, threads>>>(allfluid)
call cuda_calculate_dt<<<blocks, threads>>>(allfluid, time_steps)
call thrustsort(time_steps,size(time_steps))
istat = cudaMemcpy(tau, time_steps(1), 1, cudaMemcpyDeviceToDevice)
h_tau = tau
call cuda_calculate_forces_sph_predictor<<<blocks, threads>>>(allfluid, allforces)
call cuda_calculate_predictor_sph<<<blocks, threads>>>(allfluid, allforces, allfluidhalf, tau)
call cuda_boundary_conditions<<<blocks, threads>>>(allfluidhalf)
call cuda_calculate_forces_sph_corrector<<<blocks, threads>>>(allfluid, allfluidhalf, allfluidnext)
call cuda_boundary_conditions<<<blocks, threads>>>(allfluidnext)
call cuda_calculate_fluidhalf<<<blocks, threads>>>(allfluid, allfluidnext, allfluidhalf)
call cuda_calculate_flux_tvd<<<blocks, threads>>>(allfluidnext, allfluidhalf, allfluid, allfluxes, boolflags)
! h_all = allfluxes(1)
! print *, "Debug: density sum:", sum(abs(h_all%density))
! print *, "Debug: energy sum:", sum(abs(h_all%energy))
! print *, "Debug: impulse sum:", sum(abs(h_all%U))
call cuda_finalize<<<blocks, threads>>>(allfluid, allfluidnext, allfluxes, boolflags)
if (current_time >= save_time) then
call integral_values<<<blocks,threads>>>(allfluidnext, allfluid)
istat = cudaThreadSynchronize()
h_tau = tau
h_all = allfluidnext
print *, "density sum:", sum(abs(h_all%density))
print *, "energy sum:", sum(abs(h_all%energy))
print *, "impulse sum:", sum(abs(h_all%U))
print *, "current time:", current_time
print *, "save time:", save_time
print *, "dt:", h_tau
print *, "iteration:", iter
random = 0.0
do i = 1, outp%totalcells
if (h_all%density(i) > outp%eps) then
tmpa = outp%gamma * h_all%energy(i) / h_all%density(i)
if (tmpa > outp%eps) then
tmpa = sqrt(tmpa)
tmpa = sqrt(dot_product(h_all%U(i,:),h_all%U(i,:))) / tmpa
endif
if (tmpa > random) then
random = tmpa
endif
endif
enddo
print *, 'Mach: ', random
! call minmax<<<1,1>>>(allfluidnext, minz, maxz)
istat = cudaMemcpy(time_steps, allfluidnext%density, grid**dimen, cudaMemcpyDeviceToDevice)
call thrustsort(time_steps,size(time_steps))
h_minz%density = time_steps(1)
h_maxz%density = time_steps(grid**dimen)
istat = cudaMemcpy(time_steps, allfluidnext%energy, grid**dimen, cudaMemcpyDeviceToDevice)
call thrustsort(time_steps,size(time_steps))
h_minz%energy = time_steps(1)
h_maxz%energy = time_steps(grid**dimen)
do i = 1, dimen
istat = cudaMemcpy(time_steps, allfluidnext%U(:,i), grid**dimen, cudaMemcpyDeviceToDevice)
call thrustsort(time_steps,size(time_steps))
h_minz%U(i) = time_steps(1)
h_maxz%U(i) = time_steps(grid**dimen)
enddo
h_all = allfluidnext
!save data to hard drive:
! build filename -- i.grd
write(fn,fmt='(a,i4,a)') '1d_cuda_rho_', filenum, '.txt'
! open it with a fixed unit number
open(unit=outunit,file=fn, form='formatted')
! write something
do i = 1, outp%totalcells
write(outunit, '(F0.7,x,F0.7)') h_all%r(i,1), h_all%density(i)
enddo
! close it
close(outunit)
! build filename -- i.grd
write(fn,fmt='(a,i4,a)') '1d_cuda_p_', filenum, '.txt'
! open it with a fixed unit number
open(unit=outunit,file=fn, form='formatted')
! write something
do i = 1, outp%totalcells
write(outunit, '(F0.7,x,F0.7)') h_all%r(i,1), h_all%energy(i)
enddo
! close it
close(outunit)
! build filename -- i.grd
write(fn,fmt='(a,i4,a)') '1d_cuda_Vx_', filenum, '.txt'
! open it with a fixed unit number
open(unit=outunit,file=fn, form='formatted')
! write something
do i = 1, outp%totalcells
write(outunit, '(F0.7,x,F0.7)') h_all%r(i,1), h_all%U(i,1)
enddo
! close it
close(outunit)
filenum = filenum + 1
save_time = save_time + t_save
endif
!print *, 'current_time, h_tau', current_time, h_tau
!print *, 'iter', iter
current_time = current_time + h_tau
iter = iter + 1.0
enddo
end subroutine
function host_ftheta_k(indx, inp)
real(kind=prk) :: host_ftheta_k, coord
type(Parameters) :: inp
integer :: indx
coord = inp%smoothing_length / 2.0 + real(indx, kind=prk) * inp%smoothing_length
if (indx < inp%totalcells/3 .AND. indx > inp%totalcells/6) then
! ftheta_k = -4.0 / ( Exp(2.0 * coord) + 2.0 + Exp(-2.0 * coord) )
host_ftheta_k = inp%ta * coord ** 2 + inp%tb * coord + inp%tc
else
host_ftheta_k = 0.0
endif
! ftheta_k = 0.0
end function host_ftheta_k
attributes(global) subroutine minmax(in, outmin, outmax)
type(Fluids), device :: in
type(Fluidstemp), device :: outmax, outmin
integer i
!find min and max values
outmin%density = minval(in%density(:))
outmin%energy = minval(in%energy(:))
outmax%density = maxval(in%density(:))
outmax%energy = maxval(in%energy(:))
do i = 1, params%dimensions
outmin%U(i) = minval(in%U(:,i))
outmax%U(i) = maxval(in%U(:,i))
enddo
end subroutine
attributes(global) subroutine integral_values(out, in)
type(Fluids), device :: out, in
real(kind=prk) :: tmp(dimen)
integer :: i, j, k, idx
tmp = 0.0
i = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
j = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
k = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
idx = params%Ny * params%Nx * i + params%Nx * j + k + 1
!density:
out%density(idx) = in%density(idx)
!pressure:
if (in%density(idx) > params%eps) then
!velocity:
tmp(1:params%dimensions) = in%U(idx,:) / in%density(idx)
out%U(idx,:) = tmp(1:params%dimensions)
else
tmp = 0.0
out%U(idx,:) = 0.0
endif
out%energy(idx) = (params%gamma - 1.0) * (in%energy(idx) - 0.5 * in%density(idx) * dot_product(tmp, tmp))
if (out%energy(idx) < params%eps) then
out%energy(idx) = 0.0
endif
end subroutine
subroutine fthetacoef(inp)
real(kind=prk) :: minus
real(kind=prk) :: x1, x2, x3, y1, y2, y3
type(Parameters) :: inp
minus = 0.0
!calc x1-x3,y1-y3:
x1 = inp%smoothing_length / 2.0 + (inp%totalcells / 6.0) * inp%smoothing_length
x3 = inp%smoothing_length / 2.0 + (inp%totalcells / 3.0) * inp%smoothing_length
x2 = (x3 + x1) / 2.0
y1 = 0.0
y2 = minus
y3 = 0.0
print *, x1, x2, x3
print *, y1, y2, y3
!calc coef:
inp%ta = (y3 - (x3 * (y2 - y1) + x2 * y1 - x1 * y2) / (x2 - x1) ) / (x3 * (x3 - x1 - x2) + x1 * x2)
inp%tb = (y2 - y1) / (x2 - x1) - inp%ta * (x1 + x2)
inp%tc = (x2 * y1 - x1 * y2) / (x2 - x1) + inp%ta * x1 * x2
end subroutine
end module CUDASphTvd
program GPU_cSPH_TVD
use CUDASphTvd
use cudafor
use thrust
implicit none
type(Parameters) :: inparams
type(test), allocatable, device :: z(:)
real, allocatable, device :: zz(:)
type(test) :: h_z
type(test), device :: d_z
integer :: istat
! ! cuda events for elapsing
! type ( cudaEvent ) :: startEvent , stopEvent
! real :: time, random
! integer :: istat
!
! ! Create events
! istat = cudaEventCreate ( startEvent )
! istat = cudaEventCreate ( stopEvent )
!
!
! istat = cudaEventRecord ( startEvent , 0)
! call thrustsort(gpuData,size(gpuData))
!
! istat = cudaEventRecord ( stopEvent , 0)
! istat = cudaEventSynchronize ( stopEvent )
! istat = cudaEventElapsedTime ( time , startEvent , stopEvent )
!
! print *," Sorted array in:",time," (ms)"
inparams%smoothing_length = 0.5
inparams%dx = 0.5
inparams%courant = 0.0001
inparams%eps = 0.0000001
! host%gamma = 1.5
inparams%gamma = 1.5
inparams%rho_0 = 1
inparams%c_0 = 1
inparams%p_0 = 1
inparams%dimensions = dimen
inparams%Nx = grid
inparams%Ny = abs(dimen > 1) * grid + abs(dimen /= 2)
inparams%Nz = abs(dimen > 2) * grid + abs(dimen /= 3)
inparams%cnst = 10.0 / 7.0 / 3.1415926535897932384626433832795
inparams%totalcells = inparams%Nx * inparams%Ny * inparams%Nz
inparams%neighbourscount = 3 ** inparams%dimensions
if (inparams%dimensions == 1) then
inparams%dim3d = 2; inparams%dim2d = 2
inparams%Wnx = 0; inparams%Wny = 0
else if (inparams%dimensions == 2) then
inparams%dim3d = 2; inparams%dim2d = 0
inparams%Wnx = 3; inparams%Wny = 0
else if (inparams%dimensions == 3) then
inparams%dim3d = 0; inparams%dim2d = 0
inparams%Wnx = 3; inparams%Wny = 3
endif
! print *, "dbg!"
call calculateSmoothingGradientW0(inparams)
! print *, "dbg!"
call fthetacoef(inparams)
print *, inparams%ta, inparams%tb, inparams%tc
params = inparams
allocate(allfluid, allfluidhalf, allfluidnext, allforces)
allocate(allfluxes(dimen))
allocate(time_steps(inparams%totalcells))
allocate(boolflags(inparams%totalcells))
allocate(tau)
print *, "START!"
call cuda_run(0.01, 30.0, 0.0)
print *, "DONE CUDA!"
!Mach : |u|/Cs
end program GPU_cSPH_TVD