! coursework_2011.cuf ! ! FUNCTIONS: ! COURSEW - Entry point of console application. ! ! ! !**************************************************************************** ! ! PROGRAM: GPU_cSPH_TVD ! ! PURPOSE: Use GPU computing for cSPH-TVD method ! !**************************************************************************** module thrust interface thrustsort subroutine sort_int( input,N) bind(C,name="sort_int_wrapper") use iso_c_binding integer(c_int),device:: input(*) integer(c_int),value:: N end subroutine subroutine sort_float( input,N) bind(C,name="sort_float_wrapper") use iso_c_binding real(c_float),device:: input(*) integer(c_int),value:: N end subroutine subroutine sort_double( input,N) bind(C,name="sort_double_wrapper") use iso_c_binding real(c_double),device:: input(*) integer(c_int),value:: N end subroutine end interface end module thrust module fluid use iso_c_binding implicit none integer, parameter :: dimen = 1 integer, parameter :: grid = 512 integer, parameter :: prk = 4 type Fluids ! density = P, energy = e ! U = (U_x, U_y, U_z) ! r = (x, y, z) real(kind=prk) :: density(grid**dimen), energy(grid**dimen) real(kind=prk) :: U(grid**dimen,dimen) !U_x(:), U_y(:), U_z(:) ! speed components real(kind=prk) :: r(grid**dimen,dimen) !x(:), y(:), z(:) end type type test real, allocatable :: p(:) end type type Fluidstemp sequence real(kind=prk) :: z, energy, density real(kind=prk) :: U(dimen) real(kind=prk) :: r(dimen) end type type Parameters real(kind=prk) dx ! center of cells = dx/2 + i*dx integer Nx, Ny, Nz ! dimension number of cells, example: Nx = 10, Ny = 15, Nz = 20 -> [10x15x20] integer N !total number of cells: [10x15x20] = 3000 real(kind=prk) monaghan_multiplier, smoothing_length real(kind=prk) gamma, rho_0, c_0, p_0, cnst, eps, courant real(kind=prk) :: nablaW0(3 ** dimen, dimen) real(kind=prk) :: ta, tb, tc, minus integer totalcells, neighbourscount integer dimensions, dim2d, dim3d !1D, 2D or 3D integer WNx, WNy end type end module fluid module CUDASphTvd use cudafor use cudadevice use thrust use fluid integer, parameter :: BLOCK_SIZE=16 type(Fluids), allocatable, device :: allfluid, allfluidnext, allfluidhalf, allforces !allfluidhalf = (allfluid+allfluidnext) / 2 type(Fluids), allocatable, device :: allfluxes(:) real(kind=prk), allocatable, device :: time_steps(:) logical, allocatable, device :: boolflags(:) type(Parameters), constant :: params real(kind=prk), allocatable, device :: tau real(kind=prk), allocatable, device :: tautemp(:) contains attributes(global) subroutine cuda_calculate_forces_sph_predictor(in, outforce) type(Fluids), device :: in, outforce integer :: i, j, k, idx, idx_k !temporary variables for speed-up: real(kind=prk) :: H_i, H_k, sqrU_i !temporary 'fluid' variables: real(kind=prk) :: i_density, i_energy, theta_k, tmp real(kind=prk) :: Ui(dimen), i_impulse(dimen) real(kind=prk) :: Uk(dimen), tmpW(dimen) i = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1 j = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1 k = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1 idx = params%Ny * params%Nx * i + params%Nx * j + k + 1 sqrU_i = 0.0 !calculate Fi: !get fluid_i characteristics (save in temp): if (in%density(idx) > params%eps) then Ui(1:params%dimensions) = in%U(idx,:) / in%density(idx) else Ui = 0.0 endif i_density = in%density(idx) i_energy = in%energy(idx) i_impulse = 0.0 !calculate H_i sqrU_i = dot_product(Ui, Ui) tmp = 2.0 * ( (params%gamma - 1.0) * (i_energy - (i_density * sqrU_i) / 2.0) ) if (tmp > params%eps) then H_i = sqrt(tmp) else H_i = 0.0 endif !print *, "H_i", tmp !calculate sum(W_{ik}) and H_k and sum(Ui+Uk): H_k = 0.0 theta_k = 0.0 i_energy= 0.0 !sum_{k} {Ui +Uk} = sum_{k} {Ui} + sum_{k} {Uk} = k * Ui + sum_{k} {Uk} : do i_k = i - 1, i + 1 if ( (i_k >= 0) .AND. (i_k < params%Nz) ) then do j_k = j - 1, j + 1 if ( (j_k >= 0) .AND. (j_k < params%Ny) ) then do k_k = k - 1, k + 1 if ( (k_k >= 0) .AND. (k_k < params%Nx) ) then idx_k = params%Ny * params%Nx * i_k + params%Nx * j_k + k_k + 1 if (in%density(idx_k) > params%eps) then Uk(1:params%dimensions) = in%U(idx_k,:) / in%density(idx_k) else Uk = 0.0 endif sqrU_i = dot_product(Uk, Uk) tmp = 2.0 * ( (params%gamma - 1.0) * (in%energy(idx_k) - (in%density(idx_k) * sqrU_i) / 2.0)) if (tmp > params%eps) then H_k = sqrt(tmp) else H_k = 0.0 endif tmpW(1:params%dimensions) = params%nablaW0(params%WNx * params%WNy * (i_k - i + 1) + params%WNx * (j_k - j + 1) + k_k - k + 2,:) if (H_i > params%eps) then i_impulse = i_impulse - tmpW * H_k - tmpW * i_density / H_i * ftheta_k(idx_k) i_energy = i_energy - H_k * dot_product(Ui + Uk, tmpW) / 2.0 - ftheta_k(idx_k) * dot_product(in%U(idx,:), tmpW) / H_i else i_impulse = i_impulse - tmpW * H_k i_energy = i_energy - H_k * dot_product(Ui + Uk, tmpW) / 2.0 endif endif enddo endif enddo endif enddo !calculate predictor forces: i_impulse = H_i * i_impulse i_energy = H_i * i_energy outforce%density(idx) = 0.0 if (i_density > params%eps) then !print *, "i_impulse, i_energy", i_impulse, i_energy outforce%U(idx,:) = i_impulse(1:params%dimensions) outforce%energy(idx) = i_energy else outforce%U(idx,:) = 0.0 outforce%energy(idx) = 0.0 endif end subroutine attributes(device) function ftheta_k(indx) real(kind=prk) :: ftheta_k, coord integer :: indx coord = params%smoothing_length / 2.0 + real(indx, kind=prk) * params%smoothing_length if (indx < params%totalcells/3 .AND. indx > params%totalcells/6) then ! ftheta_k = -4.0 / ( Exp(2.0 * coord) + 2.0 + Exp(-2.0 * coord) ) ftheta_k = params%ta * coord ** 2 + params%tb * coord + params%tc else ftheta_k = 0.0 endif ftheta_k = 0.0 end function ftheta_k attributes(global) subroutine cuda_calculate_dt(in, outdt) type(Fluids), device :: in real(kind=prk), dimension(:), device :: outdt real(kind=prk) :: Cs integer :: idx, i, j, k i = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1 j = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1 k = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1 idx = params%Ny * params%Nx * i + params%Nx * j + k + 1 outdt(idx) = 10000000.0 if (in%density(idx) > params%eps) then Cs = params%gamma * (params%gamma - 1.0) * (in%energy(idx) - in%density(idx)*dot_product(in%U(idx,:) / in%density(idx),in%U(idx,:) / in%density(idx)) / 2.0) / in%density(idx) Cs = (abs(Cs > params%eps) * sqrt(abs(Cs))) outdt(idx) = params%courant * min(params%smoothing_length / (2.0 * maxval(abs(in%U(idx,:) / in%density(idx)))), params%smoothing_length / (maxval(abs(in%U(idx,:) / in%density(idx))) + Cs)) endif end subroutine cuda_calculate_dt attributes(global) subroutine cuda_calculate_predictor_sph(in, inforce, outhalf, tau) integer idx, i, j, k !temporary 'fluid' variables: real(kind=prk), device :: tau type(Fluids), device :: in, inforce, outhalf !formula 16: i = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1 j = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1 k = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1 !calculate H_i: idx = params%Ny * params%Nx * i + params%Nx * j + k + 1 !calculate predictor based on calculated forces inb4: outhalf%density(idx) = in%density(idx) + (tau / 2.0) * inforce%density(idx) if (in%density(idx) > params%eps) then outhalf%U(idx,:) = in%U(idx,:) + tau * inforce%U(idx,:) / 2.0 outhalf%energy(idx) = in%energy(idx) + tau * inforce%energy(idx) / 2.0 outhalf%r(idx,:) = in%r(idx,:) + tau * ( in%U(idx,:) / in%density(idx) + outhalf%U(idx,:) / outhalf%density(idx) ) / 4.0 else outhalf%U(idx,:) = 0.0 outhalf%energy(idx) = 0.0 outhalf%r(idx,:) = in%r(idx,:) endif end subroutine cuda_calculate_predictor_sph attributes(global) subroutine cuda_calculate_forces_sph_corrector(in, inhalf, outnext) integer idx, idx_k, i, j, k, i_k, j_k, k_k !temporary variables for speed-up: real(kind=prk) H_i, H_k, sqrU_i !temporary 'fluid' variables: real(kind=prk) i_density, i_energy, theta_k, tmp real(kind=prk) :: Ui(dimen), ri(dimen), i_impulse(dimen) real(kind=prk) :: Uk(dimen), rk(dimen), Wr(dimen), tmpW(dimen) type(Fluids), device :: in, inhalf, outnext !formula 16: i = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1 j = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1 k = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1 !calculate H_i: sqrU_i = 0.0 idx = params%Ny * params%Nx * i + params%Nx * j + k + 1 !calculate Fi: !get fluid_i characteristics (save in temp): if (inhalf%density(idx) > params%eps) then Ui(1:params%dimensions) = inhalf%U(idx,:) / inhalf%density(idx) else Ui = 0.0 endif ri(1:params%dimensions) = inhalf%r(idx,:) i_density = inhalf%density(idx) i_energy = inhalf%energy(idx) i_impulse = 0.0 !calculate H_i sqrU_i = dot_product(Ui, Ui) tmp = 2.0 * ( (params%gamma - 1.0) * (i_energy - (i_density * sqrU_i) / 2.0) ) if (tmp > params%eps) then H_i = sqrt(tmp) else H_i = 0.0 endif !calculate sum(W_{ik}) and H_k and sum(Ui+Uk): H_k = 0.0 theta_k = 0.0 i_energy= 0.0 do i_k = i - 1, i + 1 if ( (i_k >= 0) .AND. (i_k < params%Nz) ) then do j_k = j - 1, j + 1 if ( (j_k >= 0) .AND. (j_k < params%Ny) ) then do k_k = k - 1, k + 1 if ( (k_k >= 0) .AND. (k_k < params%Nx) ) then idx_k = params%Ny * params%Nx * i_k + params%Nx * j_k + k_k + 1 if (inhalf%density(idx_k) > params%eps) then Uk(1:params%dimensions) = inhalf%U(idx_k,:) / inhalf%density(idx_k) else Uk = 0.0 endif sqrU_i = dot_product(Uk, Uk) tmp = 2.0 * ( (params%gamma - 1.0) * (inhalf%energy(idx_k) - (inhalf%density(idx_k) * sqrU_i) / 2.0) ) if (tmp > params%eps) then H_k = sqrt(tmp) else H_k = 0.0 endif rk(1:params%dimensions) = inhalf%r(idx_k,:) call calculateSmoothingGradientW(ri(1:params%dimensions), rk(1:params%dimensions), Wr(1:params%dimensions)) tmpW = params%nablaW0(params%WNx * params%WNy * (i_k - i + 1) + params%WNx * (j_k - j + 1) + k_k - k + 2,:) if (H_i > params%eps) then i_impulse(1:params%dimensions) = i_impulse(1:params%dimensions) - Wr(1:params%dimensions) * H_k - tmpW * i_density / H_i * ftheta_k(idx_k) i_energy = i_energy - H_k * dot_product(Ui + Uk, Wr) / 2.0 - ftheta_k(idx_k) * dot_product(inhalf%U(idx,:), tmpW) / H_i else i_impulse(1:params%dimensions) = i_impulse(1:params%dimensions) - Wr(1:params%dimensions) * H_k i_energy = i_energy - H_k * dot_product(Ui + Uk, Wr) / 2.0 endif endif enddo endif enddo endif enddo !calculate corrector forces: i_impulse(1:params%dimensions) = H_i * i_impulse(1:params%dimensions) i_energy = H_i * i_energy ! call syncthreads() outnext%density(idx) = in%density(idx) + 0.0 ! + 0.0 * (tau / 2.0) if (i_density > params%eps) then outnext%U(idx,:) = in%U(idx,:) + tau * i_impulse(1:params%dimensions) outnext%energy(idx) = in%energy(idx) + tau * i_energy outnext%r(idx,:) = in%r(idx,:) + tau * ( in%U(idx,:) / i_density + outnext%U(idx,:) / i_density ) / 2.0 else outnext%U(idx,:) = 0.0 outnext%energy(idx) = 0.0 outnext%r(idx,:) = in%r(idx,:) endif !calculate q(n+1/2): ! inhalf%density(idx) = ( in%density(idx) + outnext%density(idx) ) / 2.0 ! inhalf%energy(idx) = ( in%energy(idx) + outnext%energy(idx) ) / 2.0 ! inhalf%r(idx,:) = ( in%r(idx,:) + outnext%r(idx,:) ) / 2.0 ! inhalf%U(idx,:) = ( in%U(idx,:) + outnext%U(idx,:) ) / 2.0 end subroutine cuda_calculate_forces_sph_corrector attributes(global) subroutine cuda_calculate_fluidhalf(in, innext, outhalf) type(Fluids), device :: in, outhalf, innext integer :: i, j, k, idx !calculate q(n+1/2): i = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1 j = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1 k = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1 idx = params%Ny * params%Nx * i + params%Nx * j + k + 1 outhalf%density(idx) = ( in%density(idx) + innext%density(idx) ) / 2.0 outhalf%energy(idx) = ( in%energy(idx) + innext%energy(idx) ) / 2.0 outhalf%r(idx,:) = ( in%r(idx,:) + innext%r(idx,:) ) / 2.0 outhalf%U(idx,:) = ( in%U(idx,:) + innext%U(idx,:) ) / 2.0 end subroutine cuda_calculate_fluidhalf attributes(global) subroutine cuda_calculate_flux_tvd(in, inhalf, outq0, outfluxes, outflags) integer idx, idx_k, i, j, k, i_k, j_k, k_k, dims, dim2dim, dim3dim integer, dimension(:) :: offset(3), ijk(3) logical, dimension(:), device :: outflags type(Fluids), device :: in, inhalf, outq0 !in = n+1 from corrector !outq0 = n type(Fluids), device :: outfluxes(dimen) type(Fluidstemp) :: neighbrs(3 + (dimen - 1) * 2), fluidhalf(4), ql, qr, Fl, Fr real(kind=prk) :: dr1, dr2, dr3, L, pl, Cl real(kind=prk) :: Vrl(dimen), Vrr(dimen) real(kind=prk) :: aa ! print *, "calculate_flux_tvd" ! neighbours = 3 + (params%dimensions - 1) * 2 ! print *, "calculate_flux_tvd debug" neighbrs%density = 0.0 neighbrs%energy = 0.0 fluidhalf%density = 0.0 fluidhalf%energy = 0.0 do i = 1, params%dimensions neighbrs(:)%r(i) = 0.0 neighbrs(:)%U(i) = 0.0 fluidhalf(:)%r(i) = 0.0 fluidhalf(:)%U(i) = 0.0 enddo offset(1) = 1 offset(2) = params%Nx offset(3) = params%Nx * params%Ny dim2dim = abs(params%dimensions == 1) * 1 dim3dim = abs(params%dimensions < 3) * 1 i = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1 j = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1 k = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1 idx = params%Ny * params%Nx * i + params%Nx * j + k + 1 do dims = 1, params%dimensions outfluxes(dims)%density(idx) = 0.0 outfluxes(dims)%energy(idx) = 0.0 outfluxes(dims)%U(idx,:) = 0.0 enddo !for each particle do: if ( (i >= 2 - params%dim3d) .AND. (i <= params%Nz-2 + dim3dim) ) then if ( (j >= 2 - params%dim2d) .AND. (j <= params%Ny-2 + dim2dim) ) then if ( (k >= 2) .AND. (k <= params%Nx-2) ) then ijk(1) = k; ijk(2) = j; ijk(3) = i; !get center cell (we will calculate fluxes relatively this cell): ! print *, "idx, ", idx ! print *, "ijk, ", ijk !take all neighbours (1d: 3, 2d: 5(cross), 3d: 7(3d-cross)): if ( (k - 1 >= 0) .AND. (k - 1 < params%Nx) ) then neighbrs(1)%density = in%density(idx-1) neighbrs(1)%energy = in%energy(idx-1) neighbrs(1)%r(:) = in%r(idx-1,:) neighbrs(1)%U(:) = in%U(idx-1,:) endif neighbrs(2)%density = in%density(idx) neighbrs(2)%energy = in%energy(idx) neighbrs(2)%r(:) = in%r(idx,:) neighbrs(2)%U(:) = in%U(idx,:) if ( (k + 1 >= 0) .AND. (k + 1 < params%Nx) ) then neighbrs(3)%density = in%density(idx+1) neighbrs(3)%energy = in%energy(idx+1) neighbrs(3)%r(:) = in%r(idx+1,:) neighbrs(3)%U(:) = in%U(idx+1,:) endif !add neighbours if dim > 1: if ( params%dimensions > 1 ) then if ( (j - 1 >= 0) .AND. (j - 1 < params%Ny) ) then idx_k = idx - params%Nx !params%Ny * params%Nx * i_k + params%Nx * j_k + k_k + 1 neighbrs(4)%density = in%density(idx_k) neighbrs(4)%energy = in%energy(idx_k) neighbrs(4)%r(:) = in%r(idx_k,:) neighbrs(4)%U(:) = in%U(idx_k,:) endif if ( (j + 1 >= 0) .AND. (j + 1 < params%Ny) ) then idx_k = idx + params%Nx neighbrs(5)%density = in%density(idx_k) neighbrs(5)%energy = in%energy(idx_k) neighbrs(5)%r(:) = in%r(idx_k,:) neighbrs(5)%U(:) = in%U(idx_k,:) endif endif !add neighbours if dim > 2: if ( params%dimensions > 2 ) then if ( (i - 1 >= 0) .AND. (i - 1 < params%Nz) ) then idx_k = idx - params%Nx * params%Ny !params%Ny * params%Nx * i_k + params%Nx * j_k + k_k + 1 neighbrs(6)%density = in%density(idx_k) neighbrs(6)%energy = in%energy(idx_k) neighbrs(6)%r(:) = in%r(idx_k,:) neighbrs(6)%U(:) = in%U(idx_k,:) endif if ( (i + 1 >= 0) .AND. (i + 1 < params%Nz) ) then idx_k = idx + params%Nx * params%Ny neighbrs(7)%density = in%density(idx_k) neighbrs(7)%energy = in%energy(idx_k) neighbrs(7)%r(:) = in%r(idx_k,:) neighbrs(7)%U(:) = in%U(idx_k,:) endif endif !Okay, now lets decide if we need to calculate fluxes (check density): !take components based on current dimension: if (maxval(neighbrs%density) > params%eps) then outflags(idx) = .TRUE. do dims = 1, params%dimensions do i_k = 0, 3 fluidhalf(i_k+1)%density = inhalf%density(idx + offset(dims) * (i_k - 2)) fluidhalf(i_k+1)%energy = inhalf%energy(idx + offset(dims) * (i_k - 2)) fluidhalf(i_k+1)%U(:) = inhalf%U(idx + offset(dims) * (i_k - 2),:) fluidhalf(i_k+1)%r(:) = inhalf%r(idx + offset(dims) * (i_k - 2),:) enddo !calc ql: dr1 = fluidhalf(2)%r(dims) - fluidhalf(1)%r(dims) dr2 = fluidhalf(3)%r(dims) - fluidhalf(2)%r(dims) dr3 = fluidhalf(4)%r(dims) - fluidhalf(3)%r(dims) if (fluidhalf(2)%density > params%eps) then drx = (params%smoothing_length / 2.0 - (fluidhalf(2)%r(dims) - (params%dx/2 + (ijk(dims) - 1) * params%dx)) / 2.0) call minmod((fluidhalf(3)%density - fluidhalf(2)%density) / dr2, (fluidhalf(2)%density - fluidhalf(1)%density) / dr1, L) ql%density = fluidhalf(2)%density + drx * L call minmod((fluidhalf(3)%energy - fluidhalf(2)%energy) / dr2, (fluidhalf(2)%energy - fluidhalf(1)%energy) / dr1, L) ql%energy = fluidhalf(2)%energy + drx * L do j_k = 1, params%dimensions call minmod((fluidhalf(3)%U(j_k) - fluidhalf(2)%U(j_k)) / dr2, (fluidhalf(2)%U(j_k) - fluidhalf(1)%U(j_k)) / dr1, L) ql%U(j_k) = fluidhalf(2)%U(j_k) + drx * L enddo ! Vrl = ql%U(1,:) !Vrl(1) = Vxl, Vrl(2) = Vyl, Vrl(3) = Vzl if (ql%density > params%eps) then Vrl = ql%U(:) / ql%density pl = abs((params%gamma - 1.0) * (ql%energy - ql%density*dot_product(Vrl,Vrl) / 2.0) > params%eps) * (params%gamma - 1.0) * (ql%energy - ql%density*dot_product(Vrl,Vrl) / 2.0) Cl = sqrt(params%gamma * pl / ql%density) else Vrl = 0.0 pl = abs((params%gamma - 1.0) * (ql%energy - ql%density*dot_product(Vrl,Vrl) / 2.0) > params%eps) * (params%gamma - 1.0) * (ql%energy - ql%density*dot_product(Vrl,Vrl) / 2.0) Cl = 0.0 endif Fl%density = ql%U(dims) Fl%U(:) = ql%U(:) * Vrl(dims) Fl%energy = ql%energy * Vrl(dims) else ql%density = 0.0 ql%energy = 0.0 ql%U(:) = 0.0 Fl%density = 0.0 Fl%energy = 0.0 Fl%U(:) = 0.0 Cl = 0.0 Vrl = 0.0 endif ! print *, "t_debug4" !calc qr: if (fluidhalf(3)%density > params%eps) then drx = (params%smoothing_length / 2.0 + (fluidhalf(3)%r(dims) - (params%dx / 2.0 + ijk(dims) * params%dx)) / 2.0) call minmod((fluidhalf(3)%density - fluidhalf(2)%density) / dr2, (fluidhalf(4)%density - fluidhalf(3)%density) / dr3, L) qr%density = fluidhalf(3)%density - drx * L call minmod((fluidhalf(3)%energy - fluidhalf(2)%energy) / dr2, (fluidhalf(4)%energy - fluidhalf(3)%energy) / dr3, L) qr%energy = fluidhalf(3)%energy - drx * L do j_k = 1, params%dimensions call minmod((fluidhalf(3)%U(j_k) - fluidhalf(2)%U(j_k)) / dr2, (fluidhalf(4)%U(j_k) - fluidhalf(3)%U(j_k)) / dr3, L) qr%U(j_k) = fluidhalf(3)%U(j_k) - drx * L enddo ! Vrr = qr%U(1,:) !Vrr(1) = Vxr, Vrr(2) = Vyr, Vrr(3) = Vzr if (qr%density > params%eps) then Vrr = qr%U(:) / qr%density pr = abs((params%gamma - 1.0) * (qr%energy - qr%density*dot_product(Vrr,Vrr) / 2.0) > params%eps) * (params%gamma - 1.0) * (qr%energy - qr%density*dot_product(Vrr,Vrr) / 2.0) Cr = sqrt(params%gamma * pr / qr%density) else Cr = 0.0 Vrr = 0.0 pr = abs((params%gamma - 1.0) * (qr%energy - qr%density*dot_product(Vrr,Vrr) / 2.0) > params%eps) * (params%gamma - 1.0) * (qr%energy - qr%density*dot_product(Vrr,Vrr) / 2.0) endif Fr%density = qr%U(dims) Fr%U(:) = qr%U(:) * Vrr(dims) Fr%energy = qr%energy * Vrr(dims) else qr%density = 0.0 qr%energy = 0.0 qr%U(:) = 0.0 Fr%density = 0.0 Fr%energy = 0.0 Fr%U(:) = 0.0 Cr = 0.0 Vrr = 0.0 endif !calc Fx: if ((Cl > params%eps) .OR. (Cr > params%eps)) then Ll = min(Vrl(dims) - Cl, Vrr(dims) - Cr) Lr = max(Vrr(dims) + Cr, Vrl(dims) + Cl) bb = max(abs(Ll), abs(Lr)) outfluxes(dims)%density(idx) = ( Fr%density + Fl%density + bb * (ql%density - qr%density) ) / 2.0 outfluxes(dims)%energy(idx) = ( Fr%energy + Fl%energy + bb * (ql%energy - qr%energy) ) / 2.0 outfluxes(dims)%U(idx,:) = ( Fr%U(:) + Fl%U(:) + bb * (ql%U(:) - qr%U(:)) ) / 2.0 else outfluxes(dims)%density(idx) = 0.0 outfluxes(dims)%energy(idx) = 0.0 outfluxes(dims)%U(idx,:) = 0.0 endif enddo else outflags(idx) = .FALSE. do dims = 1, params%dimensions outfluxes(dims)%density(idx) = 0.0 outfluxes(dims)%energy(idx) = 0.0 outfluxes(dims)%U(idx,:) = 0.0 enddo endif ! outfluxes(:)%density(idx) = 0.0 endif endif endif end subroutine cuda_calculate_flux_tvd attributes(global) subroutine cuda_finalize(outq0, in, influxes, inflags) type(Fluids), device :: in, outq0 !in = n+1 from corrector !outq0 = n type(Fluids), device :: influxes(dimen) type(Fluidstemp) :: fluidhalf(dimen) integer, dimension(:) :: offset(3) logical, dimension(:), device :: inflags integer :: dims, i, j, k, idx offset(1) = 1 offset(2) = params%Nx offset(3) = params%Nx * params%Ny i = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1 j = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1 k = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1 idx = params%Ny * params%Nx * i + params%Nx * j + k + 1 if ( (i >= 2 - params%dim3d) .AND. (i <= params%Nz-3 + params%dim3d) ) then if ( (j >= 2 - params%dim2d) .AND. (j <= params%Ny-3 + params%dim2d) ) then if ( (k >= 2) .AND. (k <= params%Nx-3) ) then if (inflags(idx)) then !calculate all neighbour fluxes (idx + offset(dims)): do dims = 1, params%dimensions fluidhalf(dims)%density = influxes(dims)%density(idx + offset(dims)) fluidhalf(dims)%energy = influxes(dims)%energy(idx + offset(dims)) fluidhalf(dims)%U(:) = influxes(dims)%U(idx + offset(dims),:) enddo !calculate final values: outq0%density(idx) = in%density(idx) + tau / params%smoothing_length * (sum(influxes(:)%density(idx)) - sum(fluidhalf(1:params%dimensions)%density)) outq0%energy(idx) = in%energy(idx) + tau / params%smoothing_length * (sum(influxes(:)%energy(idx)) - sum(fluidhalf(1:params%dimensions)%energy)) do dims = 1, params%dimensions outq0%U(idx,dims) = in%U(idx,dims) + tau / params%smoothing_length * (sum(influxes(:)%U(idx,dims)) - sum(fluidhalf(1:params%dimensions)%U(dims))) enddo else outq0%density(idx) = in%density(idx) outq0%energy(idx) = in%energy(idx) outq0%U(idx,:) = in%U(idx,:) endif endif endif endif end subroutine cuda_finalize attributes(device) subroutine minmod(a, b, out) real(kind=prk), device :: a, b, out out = ( 1.0 / 2.0) * (sign(1.0, a) + sign(1.0, b)) * min(abs(a), abs(b)) end subroutine minmod subroutine cuda_setupSystem(inparams) end subroutine attributes(global) subroutine cuda_boundary_conditions(ptr) integer j, k, idx, idx_j, idx_k type(Fluids), device :: ptr integer ti, tj, tk, tidx ti = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1 tj = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1 tk = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1 ! idx = params%Ny * params%Nx * i + params%Nx * j + k + 1 if (params%dimensions == 1) then ptr%density(1) = ptr%density(3) ptr%density(2) = ptr%density(1) ptr%density(params%Nx) = ptr%density(params%Nx - 2) ptr%density(params%Nx-1) = ptr%density(params%Nx) ptr%energy(1) = ptr%energy(3) ptr%energy(2) = ptr%energy(1) ptr%energy(params%Nx) = ptr%energy(params%Nx - 2) ptr%energy(params%Nx-1) = ptr%energy(params%Nx) ptr%U(1,1) = ptr%U(3,1) ptr%U(2,1) = ptr%U(1,1) ptr%U(params%Nx,1) = ptr%U(params%Nx - 2,1) ptr%U(params%Nx-1,1) = ptr%U(params%Nx,1) endif if (params%dimensions == 2) then ! copy x: if ( (k >= 0) .AND. (k <= params%Nx-1) ) then ! do k = 0, params%Nx-1 idx = k + 1 idx_j = params%Nx + idx idx_k = 2 * params%Nx + idx ptr%density(idx) = ptr%density(idx_k) ptr%density(idx_j) = ptr%density(idx) ptr%density((params%Ny - 1) * params%Nx + idx) = ptr%density((params%Ny - 3) * params%Nx + idx) ptr%density((params%Ny - 2) * params%Nx + idx) = ptr%density((params%Ny - 1) * params%Nx + idx) ptr%energy(idx) = ptr%energy(idx_k) ptr%energy(idx_j) = ptr%energy(idx) ptr%energy((params%Ny - 1) * params%Nx + idx) = ptr%energy((params%Ny - 3) * params%Nx + idx) ptr%energy((params%Ny - 2) * params%Nx + idx) = ptr%energy((params%Ny - 1) * params%Nx + idx) ptr%U(idx,1) = ptr%U(idx_k,1) ptr%U(idx_j,1) = ptr%U(idx,1) ptr%U(idx,2) = ptr%U(idx_k,2) ptr%U(idx_j,2) = ptr%U(idx,2) ptr%U((params%Ny - 1) * params%Nx + idx,1) = ptr%U((params%Ny - 3) * params%Nx + idx,1) ptr%U((params%Ny - 2) * params%Nx + idx,1) = ptr%U((params%Ny - 1) * params%Nx + idx,1) ptr%U((params%Ny - 1) * params%Nx + idx,2) = ptr%U((params%Ny - 3) * params%Nx + idx,2) ptr%U((params%Ny - 2) * params%Nx + idx,2) = ptr%U((params%Ny - 1) * params%Nx + idx,2) endif ! copy y: if ( (j >= 0) .AND. (j <= params%Ny-1) ) then ! do k = 0, params%Ny-1 idx = params%Nx * j + 1 idx_j = idx + 1 idx_k = idx + 2 ptr%density(idx) = ptr%density(idx_k) ptr%density(idx_j) = ptr%density(idx) ptr%density(params%Nx + idx - 1) = ptr%density(params%Nx + idx - 3) ptr%density(params%Nx + idx - 2) = ptr%density(params%Nx + idx - 1) ptr%energy(idx) = ptr%energy(idx_k) ptr%energy(idx_j) = ptr%energy(idx) ptr%energy(params%Nx + idx - 1) = ptr%energy(params%Nx + idx - 3) ptr%energy(params%Nx + idx - 2) = ptr%energy(params%Nx + idx - 1) ptr%U(idx,1) = ptr%U(idx_k,1) ptr%U(idx_j,1) = ptr%U(idx,1) ptr%U(idx,2) = ptr%U(idx_k,2) ptr%U(idx_j,2) = ptr%U(idx,2) ptr%U(params%Nx + idx - 1,1) = ptr%U(params%Nx + idx - 3,1) ptr%U(params%Nx + idx - 2,1) = ptr%U(params%Nx + idx - 1,1) ptr%U(params%Nx + idx - 1,2) = ptr%U(params%Nx + idx - 3,2) ptr%U(params%Nx + idx - 2,2) = ptr%U(params%Nx + idx - 1,2) endif endif if (params%dimensions == 3) then !copy (for [y x z]) if ( (j >= 0) .AND. (j <= params%Nz-1) ) then if ( (k >= 0) .AND. (k <= params%Ny-1) ) then idx = params%Nx * params%Ny * j + params%Nx * k + 1 idx_j = idx + 1 idx_k = idx + 2 ptr%density(idx) = ptr%density(idx_k) ptr%density(idx_j) = ptr%density(idx) ptr%density(params%Nx + idx - 1) = ptr%density(params%Nx + idx - 3) ptr%density(params%Nx + idx - 2) = ptr%density(params%Nx + idx - 1) ptr%energy(idx) = ptr%energy(idx_k) ptr%energy(idx_j) = ptr%energy(idx) ptr%energy(params%Nx + idx - 1) = ptr%energy(params%Nx + idx - 3) ptr%energy(params%Nx + idx - 2) = ptr%energy(params%Nx + idx - 1) ptr%U(idx,1) = ptr%U(idx_k,1) ptr%U(idx_j,1) = ptr%U(idx,1) ptr%U(idx,2) = ptr%U(idx_k,2) ptr%U(idx_j,2) = ptr%U(idx,2) ptr%U(idx,3) = ptr%U(idx_k,3) ptr%U(idx_j,3) = ptr%U(idx,3) ptr%U(params%Nx + idx - 1,1) = ptr%U(params%Nx + idx - 3,1) ptr%U(params%Nx + idx - 2,1) = ptr%U(params%Nx + idx - 1,1) ptr%U(params%Nx + idx - 1,2) = ptr%U(params%Nx + idx - 3,2) ptr%U(params%Nx + idx - 2,2) = ptr%U(params%Nx + idx - 1,2) ptr%U(params%Nx + idx - 1,3) = ptr%U(params%Nx + idx - 3,3) ptr%U(params%Nx + idx - 2,3) = ptr%U(params%Nx + idx - 1,3) endif endif !copy (for [x x z]) if ( (j >= 0) .AND. (j <= params%Nz-1) ) then if ( (k >= 0) .AND. (k <= params%Nx-1) ) then idx = params%Nx * params%Ny * j + k + 1 idx_j = idx + params%Nx idx_k = idx + 2 * params%Nx ptr%density(idx) = ptr%density(idx_k) ptr%density(idx_j) = ptr%density(idx) ptr%density((params%Ny - 1) * params%Nx + idx) = ptr%density((params%Ny - 3) * params%Nx + idx) ptr%density((params%Ny - 2) * params%Nx + idx) = ptr%density((params%Ny - 1) * params%Nx + idx) ptr%energy(idx) = ptr%energy(idx_k) ptr%energy(idx_j) = ptr%energy(idx) ptr%energy((params%Ny - 1) * params%Nx + idx) = ptr%energy((params%Ny - 3) * params%Nx + idx) ptr%energy((params%Ny - 2) * params%Nx + idx) = ptr%energy((params%Ny - 1) * params%Nx + idx) ptr%U(idx,1) = ptr%U(idx_k,1) ptr%U(idx_j,1) = ptr%U(idx,1) ptr%U(idx,2) = ptr%U(idx_k,2) ptr%U(idx_j,2) = ptr%U(idx,2) ptr%U(idx,3) = ptr%U(idx_k,3) ptr%U(idx_j,3) = ptr%U(idx,3) ptr%U((params%Ny - 1) * params%Nx + idx,1) = ptr%U((params%Ny - 3) * params%Nx + idx,1) ptr%U((params%Ny - 2) * params%Nx + idx,1) = ptr%U((params%Ny - 1) * params%Nx + idx,1) ptr%U((params%Ny - 1) * params%Nx + idx,2) = ptr%U((params%Ny - 3) * params%Nx + idx,2) ptr%U((params%Ny - 2) * params%Nx + idx,2) = ptr%U((params%Ny - 1) * params%Nx + idx,2) ptr%U((params%Ny - 1) * params%Nx + idx,3) = ptr%U((params%Ny - 3) * params%Nx + idx,3) ptr%U((params%Ny - 2) * params%Nx + idx,3) = ptr%U((params%Ny - 1) * params%Nx + idx,3) endif endif !copy (for [x x y]) if ( (j >= 0) .AND. (j <= params%Ny-1) ) then if ( (k >= 0) .AND. (k <= params%Nx-1) ) then idx = params%Nx * j + k + 1 idx_j = idx + params%Nx * params%Ny idx_k = idx + params%Nx * params%Ny * 2 ptr%density(idx) = ptr%density(idx_k) ptr%density(idx_j) = ptr%density(idx) ptr%density(params%Nx * params%Ny * (params%Nz - 1) + idx) = ptr%density(params%Nx * params%Ny * (params%Nz - 3) + idx) ptr%density(params%Nx * params%Ny * (params%Nz - 2) + idx) = ptr%density(params%Nx * params%Ny * (params%Nz - 1) + idx) ptr%energy(idx) = ptr%energy(idx_k) ptr%energy(idx_j) = ptr%energy(idx) ptr%energy(params%Nx * params%Ny * (params%Nz - 1) + idx) = ptr%energy(params%Nx * params%Ny * (params%Nz - 3) + idx) ptr%energy(params%Nx * params%Ny * (params%Nz - 2) + idx) = ptr%energy(params%Nx * params%Ny * (params%Nz - 1) + idx) ptr%U(idx,1) = ptr%U(idx_k,1) ptr%U(idx_j,1) = ptr%U(idx,1) ptr%U(idx,2) = ptr%U(idx_k,2) ptr%U(idx_j,2) = ptr%U(idx,2) ptr%U(idx,3) = ptr%U(idx_k,3) ptr%U(idx_j,3) = ptr%U(idx,3) ptr%U(params%Nx * params%Ny * (params%Nz - 1) + idx,1) = ptr%U(params%Nx * params%Ny * (params%Nz - 3) + idx,1) ptr%U(params%Nx * params%Ny * (params%Nz - 2) + idx,1) = ptr%U(params%Nx * params%Ny * (params%Nz - 1) + idx,1) ptr%U(params%Nx * params%Ny * (params%Nz - 1) + idx,2) = ptr%U(params%Nx * params%Ny * (params%Nz - 3) + idx,2) ptr%U(params%Nx * params%Ny * (params%Nz - 2) + idx,2) = ptr%U(params%Nx * params%Ny * (params%Nz - 1) + idx,2) ptr%U(params%Nx * params%Ny * (params%Nz - 1) + idx,3) = ptr%U(params%Nx * params%Ny * (params%Nz - 3) + idx,3) ptr%U(params%Nx * params%Ny * (params%Nz - 2) + idx,3) = ptr%U(params%Nx * params%Ny * (params%Nz - 1) + idx,3) endif endif endif end subroutine attributes(global) subroutine initial_conditions(allf) type(Fluids), device :: allf integer :: i, j, k, dims, ijk(3), N i = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1 j = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1 k = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1 N = params%Nx idx = params%Ny * params%Nx * i + params%Nx * j + k + 1 ijk(1) = k ijk(2) = j ijk(3) = i ! print *, "idx", idx ! print *, "ijk", i, j, k if (idx > (params%totalcells * 0.51) .AND. idx < (params%totalcells * 0.61)) then ! wave allfluid%density(idx) = 11.3 allfluid%U(idx,:) = -22.2 allfluid%energy(idx) = 45.0 ! if (idx < (params%totalcells * 0.4)) then else if (idx > (params%totalcells * 0.39) .AND. idx < (params%totalcells * 0.49)) then ! wave allfluid%density(idx) = 11.3 allfluid%U(idx,:) = 22.2 allfluid%energy(idx) = 45.0 else allfluid%density(idx) = 11.3 allfluid%U(idx,:) = 0.0 allfluid%energy(idx) = 0.0 endif ! else ! allfluid%density(idx) = 20.0 ! allfluid%U(idx,:) = 0.0 ! allfluid%energy(idx) = 0.0 ! endif do dims = 1, params%dimensions allf%r(idx,dims) = params%dx / 2.0 + ijk(dims) * params%dx enddo end subroutine subroutine calculateSmoothingGradientW0(inp) type(Parameters) :: inp real(kind=prk) dW, s real(kind=prk) dr_ik(inp%dimensions) integer i, j, k, i_max, j_max, k_max, dims, ijk(inp%dimensions) if ( inp%dimensions == 3 ) then i_max = 3; j_max = 3; k_max = 3; else if ( inp%dimensions == 2 ) then i_max = 1; j_max = 3; k_max = 3; else i_max = 1; j_max = 1; k_max = 3; endif do i = 0, i_max-1 do j = 0, j_max-1 do k = 0, k_max-1 if (inp%dimensions == 1) then ijk(1) = k; else if (inp%dimensions == 2) then ijk(1) = j; ijk(2) = k; else ijk(1) = i; ijk(2) = j; ijk(3) = k; endif do dims = 1, inp%dimensions dr_ik(dims) = inp%smoothing_length*(1-ijk(dims)) enddo s = dot_product(dr_ik, dr_ik) s = sqrt(s); dW = gradW(s, inp); inp%nablaW0(inp%WNx * inp%WNx * i + inp%WNx * j + k + 1, :) = dW * dr_ik enddo enddo enddo end subroutine function gradW(s, inp) type(Parameters), intent(in) :: inp real(kind=prk) :: s, ss, dW, gradW ss = s / inp%smoothing_length if ((ss > 0) .AND. (ss < 1.0)) then dW = (-3.0 + 2.25 * ss ) * ss else if((ss < 2.0) .AND. (ss >= 1.0)) then dW = (2.0 - ss); dW = -0.75 * dW * dW else dW=0.0; endif if(s > inp%eps) then gradW = inp%cnst * dW / s / inp%smoothing_length else gradW = 0.0 endif end function gradW attributes(device) subroutine calculateSmoothingGradientW(r_i, r_k, W) real(kind=prk) q, s real(kind=prk), device :: r_i(dimen), r_k(dimen), W(dimen) s = sqrt(SUM((r_i - r_k) ** 2)) q = s / params%smoothing_length if (s > params%eps) then !if s (0;1] if ( (q > 0) .AND. (q <= 1.0) ) then W = (r_i - r_k) * params%cnst * ( -3.0 + 2.25 * q ) * q / s / params%smoothing_length ! W = params%cnst * (r_k - r_i) * (4.0 * params%smoothing_length - 3.0 * s) !if s (1;2] : else if ( (q > 1.0) .AND. (q <= 2.0) ) then W = (r_i - r_k) * params%cnst * ( -0.75 * (2.0 - q) ** 2 ) / s / params%smoothing_length ! W = params%cnst * (r_k - r_i) * ( (s - 2.0 * params%smoothing_length) ** 2 ) / s else W = 0.0 endif else W = 0.0 endif end subroutine subroutine cuda_run(t_save, t_end, t_step) real(kind=prk) :: t_save, t_end, t_step real(kind=prk) :: total_time, current_time, end_time, save_time real(kind=prk) :: iter, h_tau type(dim3) :: blocks, threads character(len=70) :: fn, tmp type(Fluids) :: h_all type(Fluidstemp), device :: minz, maxz type(Fluidstemp) :: h_minz, h_maxz type(Parameters) :: outp type ( cudaEvent ) :: startEvent , stopEvent real :: time, random, tmpa integer :: istat integer, parameter :: outunit=44 integer :: filenum, i logical :: flag outp = params if (outp%dimensions == 1) then blocks = dim3(outp%Nx / BLOCK_SIZE, 1, 1) threads = dim3(BLOCK_SIZE, 1, 1) else if (outp%dimensions == 2) then blocks = dim3(outp%Nx / BLOCK_SIZE, outp%Ny / BLOCK_SIZE, 1) threads = dim3(BLOCK_SIZE, BLOCK_SIZE, 1) else blocks = dim3(outp%Nx / BLOCK_SIZE, outp%Ny / BLOCK_SIZE, outp%Nz / BLOCK_SIZE) threads = dim3(BLOCK_SIZE, BLOCK_SIZE, BLOCK_SIZE) endif !print *, "run!" flag = .FALSE. filenum = 1 current_time = 0.0 save_time = 0.0 iter = 1.0 end_time = t_end time_steps = 1000000.0 call initial_conditions<<>>(allfluid) !FTheta plot: h_all = allfluid ! build filename -- i.grd write(fn,fmt='(a)') 'ftheta.txt' ! open it with a fixed unit number open(unit=outunit,file=fn, form='formatted') ! write something do i = 1, outp%totalcells write(outunit, '(F0.7,x,F0.7)') h_all%r(i,1), host_ftheta_k(i, outp) enddo ! close it close(outunit) !Calculate: do while (current_time < end_time) write(tmp,fmt='(a,i0,a)') '(',outp%Nx, 'F)' ! do while (iter < 6) call cuda_boundary_conditions<<>>(allfluid) call cuda_calculate_dt<<>>(allfluid, time_steps) call thrustsort(time_steps,size(time_steps)) istat = cudaMemcpy(tau, time_steps(1), 1, cudaMemcpyDeviceToDevice) h_tau = tau call cuda_calculate_forces_sph_predictor<<>>(allfluid, allforces) call cuda_calculate_predictor_sph<<>>(allfluid, allforces, allfluidhalf, tau) call cuda_boundary_conditions<<>>(allfluidhalf) call cuda_calculate_forces_sph_corrector<<>>(allfluid, allfluidhalf, allfluidnext) call cuda_boundary_conditions<<>>(allfluidnext) call cuda_calculate_fluidhalf<<>>(allfluid, allfluidnext, allfluidhalf) call cuda_calculate_flux_tvd<<>>(allfluidnext, allfluidhalf, allfluid, allfluxes, boolflags) ! h_all = allfluxes(1) ! print *, "Debug: density sum:", sum(abs(h_all%density)) ! print *, "Debug: energy sum:", sum(abs(h_all%energy)) ! print *, "Debug: impulse sum:", sum(abs(h_all%U)) call cuda_finalize<<>>(allfluid, allfluidnext, allfluxes, boolflags) if (current_time >= save_time) then call integral_values<<>>(allfluidnext, allfluid) istat = cudaThreadSynchronize() h_tau = tau h_all = allfluidnext print *, "density sum:", sum(abs(h_all%density)) print *, "energy sum:", sum(abs(h_all%energy)) print *, "impulse sum:", sum(abs(h_all%U)) print *, "current time:", current_time print *, "save time:", save_time print *, "dt:", h_tau print *, "iteration:", iter random = 0.0 do i = 1, outp%totalcells if (h_all%density(i) > outp%eps) then tmpa = outp%gamma * h_all%energy(i) / h_all%density(i) if (tmpa > outp%eps) then tmpa = sqrt(tmpa) tmpa = sqrt(dot_product(h_all%U(i,:),h_all%U(i,:))) / tmpa endif if (tmpa > random) then random = tmpa endif endif enddo print *, 'Mach: ', random ! call minmax<<<1,1>>>(allfluidnext, minz, maxz) istat = cudaMemcpy(time_steps, allfluidnext%density, grid**dimen, cudaMemcpyDeviceToDevice) call thrustsort(time_steps,size(time_steps)) h_minz%density = time_steps(1) h_maxz%density = time_steps(grid**dimen) istat = cudaMemcpy(time_steps, allfluidnext%energy, grid**dimen, cudaMemcpyDeviceToDevice) call thrustsort(time_steps,size(time_steps)) h_minz%energy = time_steps(1) h_maxz%energy = time_steps(grid**dimen) do i = 1, dimen istat = cudaMemcpy(time_steps, allfluidnext%U(:,i), grid**dimen, cudaMemcpyDeviceToDevice) call thrustsort(time_steps,size(time_steps)) h_minz%U(i) = time_steps(1) h_maxz%U(i) = time_steps(grid**dimen) enddo h_all = allfluidnext !save data to hard drive: ! build filename -- i.grd write(fn,fmt='(a,i4,a)') '1d_cuda_rho_', filenum, '.txt' ! open it with a fixed unit number open(unit=outunit,file=fn, form='formatted') ! write something do i = 1, outp%totalcells write(outunit, '(F0.7,x,F0.7)') h_all%r(i,1), h_all%density(i) enddo ! close it close(outunit) ! build filename -- i.grd write(fn,fmt='(a,i4,a)') '1d_cuda_p_', filenum, '.txt' ! open it with a fixed unit number open(unit=outunit,file=fn, form='formatted') ! write something do i = 1, outp%totalcells write(outunit, '(F0.7,x,F0.7)') h_all%r(i,1), h_all%energy(i) enddo ! close it close(outunit) ! build filename -- i.grd write(fn,fmt='(a,i4,a)') '1d_cuda_Vx_', filenum, '.txt' ! open it with a fixed unit number open(unit=outunit,file=fn, form='formatted') ! write something do i = 1, outp%totalcells write(outunit, '(F0.7,x,F0.7)') h_all%r(i,1), h_all%U(i,1) enddo ! close it close(outunit) filenum = filenum + 1 save_time = save_time + t_save endif !print *, 'current_time, h_tau', current_time, h_tau !print *, 'iter', iter current_time = current_time + h_tau iter = iter + 1.0 enddo end subroutine function host_ftheta_k(indx, inp) real(kind=prk) :: host_ftheta_k, coord type(Parameters) :: inp integer :: indx coord = inp%smoothing_length / 2.0 + real(indx, kind=prk) * inp%smoothing_length if (indx < inp%totalcells/3 .AND. indx > inp%totalcells/6) then ! ftheta_k = -4.0 / ( Exp(2.0 * coord) + 2.0 + Exp(-2.0 * coord) ) host_ftheta_k = inp%ta * coord ** 2 + inp%tb * coord + inp%tc else host_ftheta_k = 0.0 endif ! ftheta_k = 0.0 end function host_ftheta_k attributes(global) subroutine minmax(in, outmin, outmax) type(Fluids), device :: in type(Fluidstemp), device :: outmax, outmin integer i !find min and max values outmin%density = minval(in%density(:)) outmin%energy = minval(in%energy(:)) outmax%density = maxval(in%density(:)) outmax%energy = maxval(in%energy(:)) do i = 1, params%dimensions outmin%U(i) = minval(in%U(:,i)) outmax%U(i) = maxval(in%U(:,i)) enddo end subroutine attributes(global) subroutine integral_values(out, in) type(Fluids), device :: out, in real(kind=prk) :: tmp(dimen) integer :: i, j, k, idx tmp = 0.0 i = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1 j = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1 k = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1 idx = params%Ny * params%Nx * i + params%Nx * j + k + 1 !density: out%density(idx) = in%density(idx) !pressure: if (in%density(idx) > params%eps) then !velocity: tmp(1:params%dimensions) = in%U(idx,:) / in%density(idx) out%U(idx,:) = tmp(1:params%dimensions) else tmp = 0.0 out%U(idx,:) = 0.0 endif out%energy(idx) = (params%gamma - 1.0) * (in%energy(idx) - 0.5 * in%density(idx) * dot_product(tmp, tmp)) if (out%energy(idx) < params%eps) then out%energy(idx) = 0.0 endif end subroutine subroutine fthetacoef(inp) real(kind=prk) :: minus real(kind=prk) :: x1, x2, x3, y1, y2, y3 type(Parameters) :: inp minus = 0.0 !calc x1-x3,y1-y3: x1 = inp%smoothing_length / 2.0 + (inp%totalcells / 6.0) * inp%smoothing_length x3 = inp%smoothing_length / 2.0 + (inp%totalcells / 3.0) * inp%smoothing_length x2 = (x3 + x1) / 2.0 y1 = 0.0 y2 = minus y3 = 0.0 print *, x1, x2, x3 print *, y1, y2, y3 !calc coef: inp%ta = (y3 - (x3 * (y2 - y1) + x2 * y1 - x1 * y2) / (x2 - x1) ) / (x3 * (x3 - x1 - x2) + x1 * x2) inp%tb = (y2 - y1) / (x2 - x1) - inp%ta * (x1 + x2) inp%tc = (x2 * y1 - x1 * y2) / (x2 - x1) + inp%ta * x1 * x2 end subroutine end module CUDASphTvd program GPU_cSPH_TVD use CUDASphTvd use cudafor use thrust implicit none type(Parameters) :: inparams type(test), allocatable, device :: z(:) real, allocatable, device :: zz(:) type(test) :: h_z type(test), device :: d_z integer :: istat ! ! cuda events for elapsing ! type ( cudaEvent ) :: startEvent , stopEvent ! real :: time, random ! integer :: istat ! ! ! Create events ! istat = cudaEventCreate ( startEvent ) ! istat = cudaEventCreate ( stopEvent ) ! ! ! istat = cudaEventRecord ( startEvent , 0) ! call thrustsort(gpuData,size(gpuData)) ! ! istat = cudaEventRecord ( stopEvent , 0) ! istat = cudaEventSynchronize ( stopEvent ) ! istat = cudaEventElapsedTime ( time , startEvent , stopEvent ) ! ! print *," Sorted array in:",time," (ms)" inparams%smoothing_length = 0.5 inparams%dx = 0.5 inparams%courant = 0.0001 inparams%eps = 0.0000001 ! host%gamma = 1.5 inparams%gamma = 1.5 inparams%rho_0 = 1 inparams%c_0 = 1 inparams%p_0 = 1 inparams%dimensions = dimen inparams%Nx = grid inparams%Ny = abs(dimen > 1) * grid + abs(dimen /= 2) inparams%Nz = abs(dimen > 2) * grid + abs(dimen /= 3) inparams%cnst = 10.0 / 7.0 / 3.1415926535897932384626433832795 inparams%totalcells = inparams%Nx * inparams%Ny * inparams%Nz inparams%neighbourscount = 3 ** inparams%dimensions if (inparams%dimensions == 1) then inparams%dim3d = 2; inparams%dim2d = 2 inparams%Wnx = 0; inparams%Wny = 0 else if (inparams%dimensions == 2) then inparams%dim3d = 2; inparams%dim2d = 0 inparams%Wnx = 3; inparams%Wny = 0 else if (inparams%dimensions == 3) then inparams%dim3d = 0; inparams%dim2d = 0 inparams%Wnx = 3; inparams%Wny = 3 endif ! print *, "dbg!" call calculateSmoothingGradientW0(inparams) ! print *, "dbg!" call fthetacoef(inparams) print *, inparams%ta, inparams%tb, inparams%tc params = inparams allocate(allfluid, allfluidhalf, allfluidnext, allforces) allocate(allfluxes(dimen)) allocate(time_steps(inparams%totalcells)) allocate(boolflags(inparams%totalcells)) allocate(tau) print *, "START!" call cuda_run(0.01, 30.0, 0.0) print *, "DONE CUDA!" !Mach : |u|/Cs end program GPU_cSPH_TVD