Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ! coursework_2011.cuf
- !
- ! FUNCTIONS:
- ! COURSEW - Entry point of console application.
- !
- !
- !
- !****************************************************************************
- !
- ! PROGRAM: GPU_cSPH_TVD
- !
- ! PURPOSE: Use GPU computing for cSPH-TVD method
- !
- !****************************************************************************
- module thrust
- interface thrustsort
- subroutine sort_int( input,N) bind(C,name="sort_int_wrapper")
- use iso_c_binding
- integer(c_int),device:: input(*)
- integer(c_int),value:: N
- end subroutine
- subroutine sort_float( input,N) bind(C,name="sort_float_wrapper")
- use iso_c_binding
- real(c_float),device:: input(*)
- integer(c_int),value:: N
- end subroutine
- subroutine sort_double( input,N) bind(C,name="sort_double_wrapper")
- use iso_c_binding
- real(c_double),device:: input(*)
- integer(c_int),value:: N
- end subroutine
- end interface
- end module thrust
- module fluid
- use iso_c_binding
- implicit none
- integer, parameter :: dimen = 1
- integer, parameter :: grid = 512
- integer, parameter :: prk = 4
- type Fluids
- ! density = P, energy = e
- ! U = (U_x, U_y, U_z)
- ! r = (x, y, z)
- real(kind=prk) :: density(grid**dimen), energy(grid**dimen)
- real(kind=prk) :: U(grid**dimen,dimen) !U_x(:), U_y(:), U_z(:) ! speed components
- real(kind=prk) :: r(grid**dimen,dimen) !x(:), y(:), z(:)
- end type
- type test
- real, allocatable :: p(:)
- end type
- type Fluidstemp
- sequence
- real(kind=prk) :: z, energy, density
- real(kind=prk) :: U(dimen)
- real(kind=prk) :: r(dimen)
- end type
- type Parameters
- real(kind=prk) dx ! center of cells = dx/2 + i*dx
- integer Nx, Ny, Nz ! dimension number of cells, example: Nx = 10, Ny = 15, Nz = 20 -> [10x15x20]
- integer N !total number of cells: [10x15x20] = 3000
- real(kind=prk) monaghan_multiplier, smoothing_length
- real(kind=prk) gamma, rho_0, c_0, p_0, cnst, eps, courant
- real(kind=prk) :: nablaW0(3 ** dimen, dimen)
- real(kind=prk) :: ta, tb, tc, minus
- integer totalcells, neighbourscount
- integer dimensions, dim2d, dim3d !1D, 2D or 3D
- integer WNx, WNy
- end type
- end module fluid
- module CUDASphTvd
- use cudafor
- use cudadevice
- use thrust
- use fluid
- integer, parameter :: BLOCK_SIZE=16
- type(Fluids), allocatable, device :: allfluid, allfluidnext, allfluidhalf, allforces !allfluidhalf = (allfluid+allfluidnext) / 2
- type(Fluids), allocatable, device :: allfluxes(:)
- real(kind=prk), allocatable, device :: time_steps(:)
- logical, allocatable, device :: boolflags(:)
- type(Parameters), constant :: params
- real(kind=prk), allocatable, device :: tau
- real(kind=prk), allocatable, device :: tautemp(:)
- contains
- attributes(global) subroutine cuda_calculate_forces_sph_predictor(in, outforce)
- type(Fluids), device :: in, outforce
- integer :: i, j, k, idx, idx_k
- !temporary variables for speed-up:
- real(kind=prk) :: H_i, H_k, sqrU_i
- !temporary 'fluid' variables:
- real(kind=prk) :: i_density, i_energy, theta_k, tmp
- real(kind=prk) :: Ui(dimen), i_impulse(dimen)
- real(kind=prk) :: Uk(dimen), tmpW(dimen)
- i = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
- j = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
- k = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
- idx = params%Ny * params%Nx * i + params%Nx * j + k + 1
- sqrU_i = 0.0
- !calculate Fi:
- !get fluid_i characteristics (save in temp):
- if (in%density(idx) > params%eps) then
- Ui(1:params%dimensions) = in%U(idx,:) / in%density(idx)
- else
- Ui = 0.0
- endif
- i_density = in%density(idx)
- i_energy = in%energy(idx)
- i_impulse = 0.0
- !calculate H_i
- sqrU_i = dot_product(Ui, Ui)
- tmp = 2.0 * ( (params%gamma - 1.0) * (i_energy - (i_density * sqrU_i) / 2.0) )
- if (tmp > params%eps) then
- H_i = sqrt(tmp)
- else
- H_i = 0.0
- endif
- !print *, "H_i", tmp
- !calculate sum(W_{ik}) and H_k and sum(Ui+Uk):
- H_k = 0.0
- theta_k = 0.0
- i_energy= 0.0
- !sum_{k} {Ui +Uk} = sum_{k} {Ui} + sum_{k} {Uk} = k * Ui + sum_{k} {Uk} :
- do i_k = i - 1, i + 1
- if ( (i_k >= 0) .AND. (i_k < params%Nz) ) then
- do j_k = j - 1, j + 1
- if ( (j_k >= 0) .AND. (j_k < params%Ny) ) then
- do k_k = k - 1, k + 1
- if ( (k_k >= 0) .AND. (k_k < params%Nx) ) then
- idx_k = params%Ny * params%Nx * i_k + params%Nx * j_k + k_k + 1
- if (in%density(idx_k) > params%eps) then
- Uk(1:params%dimensions) = in%U(idx_k,:) / in%density(idx_k)
- else
- Uk = 0.0
- endif
- sqrU_i = dot_product(Uk, Uk)
- tmp = 2.0 * ( (params%gamma - 1.0) * (in%energy(idx_k) - (in%density(idx_k) * sqrU_i) / 2.0))
- if (tmp > params%eps) then
- H_k = sqrt(tmp)
- else
- H_k = 0.0
- endif
- tmpW(1:params%dimensions) = params%nablaW0(params%WNx * params%WNy * (i_k - i + 1) + params%WNx * (j_k - j + 1) + k_k - k + 2,:)
- if (H_i > params%eps) then
- i_impulse = i_impulse - tmpW * H_k - tmpW * i_density / H_i * ftheta_k(idx_k)
- i_energy = i_energy - H_k * dot_product(Ui + Uk, tmpW) / 2.0 - ftheta_k(idx_k) * dot_product(in%U(idx,:), tmpW) / H_i
- else
- i_impulse = i_impulse - tmpW * H_k
- i_energy = i_energy - H_k * dot_product(Ui + Uk, tmpW) / 2.0
- endif
- endif
- enddo
- endif
- enddo
- endif
- enddo
- !calculate predictor forces:
- i_impulse = H_i * i_impulse
- i_energy = H_i * i_energy
- outforce%density(idx) = 0.0
- if (i_density > params%eps) then
- !print *, "i_impulse, i_energy", i_impulse, i_energy
- outforce%U(idx,:) = i_impulse(1:params%dimensions)
- outforce%energy(idx) = i_energy
- else
- outforce%U(idx,:) = 0.0
- outforce%energy(idx) = 0.0
- endif
- end subroutine
- attributes(device) function ftheta_k(indx)
- real(kind=prk) :: ftheta_k, coord
- integer :: indx
- coord = params%smoothing_length / 2.0 + real(indx, kind=prk) * params%smoothing_length
- if (indx < params%totalcells/3 .AND. indx > params%totalcells/6) then
- ! ftheta_k = -4.0 / ( Exp(2.0 * coord) + 2.0 + Exp(-2.0 * coord) )
- ftheta_k = params%ta * coord ** 2 + params%tb * coord + params%tc
- else
- ftheta_k = 0.0
- endif
- ftheta_k = 0.0
- end function ftheta_k
- attributes(global) subroutine cuda_calculate_dt(in, outdt)
- type(Fluids), device :: in
- real(kind=prk), dimension(:), device :: outdt
- real(kind=prk) :: Cs
- integer :: idx, i, j, k
- i = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
- j = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
- k = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
- idx = params%Ny * params%Nx * i + params%Nx * j + k + 1
- outdt(idx) = 10000000.0
- if (in%density(idx) > params%eps) then
- Cs = params%gamma * (params%gamma - 1.0) * (in%energy(idx) - in%density(idx)*dot_product(in%U(idx,:) / in%density(idx),in%U(idx,:) / in%density(idx)) / 2.0) / in%density(idx)
- Cs = (abs(Cs > params%eps) * sqrt(abs(Cs)))
- outdt(idx) = params%courant * min(params%smoothing_length / (2.0 * maxval(abs(in%U(idx,:) / in%density(idx)))), params%smoothing_length / (maxval(abs(in%U(idx,:) / in%density(idx))) + Cs))
- endif
- end subroutine cuda_calculate_dt
- attributes(global) subroutine cuda_calculate_predictor_sph(in, inforce, outhalf, tau)
- integer idx, i, j, k
- !temporary 'fluid' variables:
- real(kind=prk), device :: tau
- type(Fluids), device :: in, inforce, outhalf
- !formula 16:
- i = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
- j = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
- k = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
- !calculate H_i:
- idx = params%Ny * params%Nx * i + params%Nx * j + k + 1
- !calculate predictor based on calculated forces inb4:
- outhalf%density(idx) = in%density(idx) + (tau / 2.0) * inforce%density(idx)
- if (in%density(idx) > params%eps) then
- outhalf%U(idx,:) = in%U(idx,:) + tau * inforce%U(idx,:) / 2.0
- outhalf%energy(idx) = in%energy(idx) + tau * inforce%energy(idx) / 2.0
- outhalf%r(idx,:) = in%r(idx,:) + tau * ( in%U(idx,:) / in%density(idx) + outhalf%U(idx,:) / outhalf%density(idx) ) / 4.0
- else
- outhalf%U(idx,:) = 0.0
- outhalf%energy(idx) = 0.0
- outhalf%r(idx,:) = in%r(idx,:)
- endif
- end subroutine cuda_calculate_predictor_sph
- attributes(global) subroutine cuda_calculate_forces_sph_corrector(in, inhalf, outnext)
- integer idx, idx_k, i, j, k, i_k, j_k, k_k
- !temporary variables for speed-up:
- real(kind=prk) H_i, H_k, sqrU_i
- !temporary 'fluid' variables:
- real(kind=prk) i_density, i_energy, theta_k, tmp
- real(kind=prk) :: Ui(dimen), ri(dimen), i_impulse(dimen)
- real(kind=prk) :: Uk(dimen), rk(dimen), Wr(dimen), tmpW(dimen)
- type(Fluids), device :: in, inhalf, outnext
- !formula 16:
- i = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
- j = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
- k = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
- !calculate H_i:
- sqrU_i = 0.0
- idx = params%Ny * params%Nx * i + params%Nx * j + k + 1
- !calculate Fi:
- !get fluid_i characteristics (save in temp):
- if (inhalf%density(idx) > params%eps) then
- Ui(1:params%dimensions) = inhalf%U(idx,:) / inhalf%density(idx)
- else
- Ui = 0.0
- endif
- ri(1:params%dimensions) = inhalf%r(idx,:)
- i_density = inhalf%density(idx)
- i_energy = inhalf%energy(idx)
- i_impulse = 0.0
- !calculate H_i
- sqrU_i = dot_product(Ui, Ui)
- tmp = 2.0 * ( (params%gamma - 1.0) * (i_energy - (i_density * sqrU_i) / 2.0) )
- if (tmp > params%eps) then
- H_i = sqrt(tmp)
- else
- H_i = 0.0
- endif
- !calculate sum(W_{ik}) and H_k and sum(Ui+Uk):
- H_k = 0.0
- theta_k = 0.0
- i_energy= 0.0
- do i_k = i - 1, i + 1
- if ( (i_k >= 0) .AND. (i_k < params%Nz) ) then
- do j_k = j - 1, j + 1
- if ( (j_k >= 0) .AND. (j_k < params%Ny) ) then
- do k_k = k - 1, k + 1
- if ( (k_k >= 0) .AND. (k_k < params%Nx) ) then
- idx_k = params%Ny * params%Nx * i_k + params%Nx * j_k + k_k + 1
- if (inhalf%density(idx_k) > params%eps) then
- Uk(1:params%dimensions) = inhalf%U(idx_k,:) / inhalf%density(idx_k)
- else
- Uk = 0.0
- endif
- sqrU_i = dot_product(Uk, Uk)
- tmp = 2.0 * ( (params%gamma - 1.0) * (inhalf%energy(idx_k) - (inhalf%density(idx_k) * sqrU_i) / 2.0) )
- if (tmp > params%eps) then
- H_k = sqrt(tmp)
- else
- H_k = 0.0
- endif
- rk(1:params%dimensions) = inhalf%r(idx_k,:)
- call calculateSmoothingGradientW(ri(1:params%dimensions), rk(1:params%dimensions), Wr(1:params%dimensions))
- tmpW = params%nablaW0(params%WNx * params%WNy * (i_k - i + 1) + params%WNx * (j_k - j + 1) + k_k - k + 2,:)
- if (H_i > params%eps) then
- i_impulse(1:params%dimensions) = i_impulse(1:params%dimensions) - Wr(1:params%dimensions) * H_k - tmpW * i_density / H_i * ftheta_k(idx_k)
- i_energy = i_energy - H_k * dot_product(Ui + Uk, Wr) / 2.0 - ftheta_k(idx_k) * dot_product(inhalf%U(idx,:), tmpW) / H_i
- else
- i_impulse(1:params%dimensions) = i_impulse(1:params%dimensions) - Wr(1:params%dimensions) * H_k
- i_energy = i_energy - H_k * dot_product(Ui + Uk, Wr) / 2.0
- endif
- endif
- enddo
- endif
- enddo
- endif
- enddo
- !calculate corrector forces:
- i_impulse(1:params%dimensions) = H_i * i_impulse(1:params%dimensions)
- i_energy = H_i * i_energy
- ! call syncthreads()
- outnext%density(idx) = in%density(idx) + 0.0 ! + 0.0 * (tau / 2.0)
- if (i_density > params%eps) then
- outnext%U(idx,:) = in%U(idx,:) + tau * i_impulse(1:params%dimensions)
- outnext%energy(idx) = in%energy(idx) + tau * i_energy
- outnext%r(idx,:) = in%r(idx,:) + tau * ( in%U(idx,:) / i_density + outnext%U(idx,:) / i_density ) / 2.0
- else
- outnext%U(idx,:) = 0.0
- outnext%energy(idx) = 0.0
- outnext%r(idx,:) = in%r(idx,:)
- endif
- !calculate q(n+1/2):
- ! inhalf%density(idx) = ( in%density(idx) + outnext%density(idx) ) / 2.0
- ! inhalf%energy(idx) = ( in%energy(idx) + outnext%energy(idx) ) / 2.0
- ! inhalf%r(idx,:) = ( in%r(idx,:) + outnext%r(idx,:) ) / 2.0
- ! inhalf%U(idx,:) = ( in%U(idx,:) + outnext%U(idx,:) ) / 2.0
- end subroutine cuda_calculate_forces_sph_corrector
- attributes(global) subroutine cuda_calculate_fluidhalf(in, innext, outhalf)
- type(Fluids), device :: in, outhalf, innext
- integer :: i, j, k, idx
- !calculate q(n+1/2):
- i = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
- j = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
- k = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
- idx = params%Ny * params%Nx * i + params%Nx * j + k + 1
- outhalf%density(idx) = ( in%density(idx) + innext%density(idx) ) / 2.0
- outhalf%energy(idx) = ( in%energy(idx) + innext%energy(idx) ) / 2.0
- outhalf%r(idx,:) = ( in%r(idx,:) + innext%r(idx,:) ) / 2.0
- outhalf%U(idx,:) = ( in%U(idx,:) + innext%U(idx,:) ) / 2.0
- end subroutine cuda_calculate_fluidhalf
- attributes(global) subroutine cuda_calculate_flux_tvd(in, inhalf, outq0, outfluxes, outflags)
- integer idx, idx_k, i, j, k, i_k, j_k, k_k, dims, dim2dim, dim3dim
- integer, dimension(:) :: offset(3), ijk(3)
- logical, dimension(:), device :: outflags
- type(Fluids), device :: in, inhalf, outq0 !in = n+1 from corrector !outq0 = n
- type(Fluids), device :: outfluxes(dimen)
- type(Fluidstemp) :: neighbrs(3 + (dimen - 1) * 2), fluidhalf(4), ql, qr, Fl, Fr
- real(kind=prk) :: dr1, dr2, dr3, L, pl, Cl
- real(kind=prk) :: Vrl(dimen), Vrr(dimen)
- real(kind=prk) :: aa
- ! print *, "calculate_flux_tvd"
- ! neighbours = 3 + (params%dimensions - 1) * 2
- ! print *, "calculate_flux_tvd debug"
- neighbrs%density = 0.0
- neighbrs%energy = 0.0
- fluidhalf%density = 0.0
- fluidhalf%energy = 0.0
- do i = 1, params%dimensions
- neighbrs(:)%r(i) = 0.0
- neighbrs(:)%U(i) = 0.0
- fluidhalf(:)%r(i) = 0.0
- fluidhalf(:)%U(i) = 0.0
- enddo
- offset(1) = 1
- offset(2) = params%Nx
- offset(3) = params%Nx * params%Ny
- dim2dim = abs(params%dimensions == 1) * 1
- dim3dim = abs(params%dimensions < 3) * 1
- i = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
- j = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
- k = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
- idx = params%Ny * params%Nx * i + params%Nx * j + k + 1
- do dims = 1, params%dimensions
- outfluxes(dims)%density(idx) = 0.0
- outfluxes(dims)%energy(idx) = 0.0
- outfluxes(dims)%U(idx,:) = 0.0
- enddo
- !for each particle do:
- if ( (i >= 2 - params%dim3d) .AND. (i <= params%Nz-2 + dim3dim) ) then
- if ( (j >= 2 - params%dim2d) .AND. (j <= params%Ny-2 + dim2dim) ) then
- if ( (k >= 2) .AND. (k <= params%Nx-2) ) then
- ijk(1) = k; ijk(2) = j; ijk(3) = i;
- !get center cell (we will calculate fluxes relatively this cell):
- ! print *, "idx, ", idx
- ! print *, "ijk, ", ijk
- !take all neighbours (1d: 3, 2d: 5(cross), 3d: 7(3d-cross)):
- if ( (k - 1 >= 0) .AND. (k - 1 < params%Nx) ) then
- neighbrs(1)%density = in%density(idx-1)
- neighbrs(1)%energy = in%energy(idx-1)
- neighbrs(1)%r(:) = in%r(idx-1,:)
- neighbrs(1)%U(:) = in%U(idx-1,:)
- endif
- neighbrs(2)%density = in%density(idx)
- neighbrs(2)%energy = in%energy(idx)
- neighbrs(2)%r(:) = in%r(idx,:)
- neighbrs(2)%U(:) = in%U(idx,:)
- if ( (k + 1 >= 0) .AND. (k + 1 < params%Nx) ) then
- neighbrs(3)%density = in%density(idx+1)
- neighbrs(3)%energy = in%energy(idx+1)
- neighbrs(3)%r(:) = in%r(idx+1,:)
- neighbrs(3)%U(:) = in%U(idx+1,:)
- endif
- !add neighbours if dim > 1:
- if ( params%dimensions > 1 ) then
- if ( (j - 1 >= 0) .AND. (j - 1 < params%Ny) ) then
- idx_k = idx - params%Nx !params%Ny * params%Nx * i_k + params%Nx * j_k + k_k + 1
- neighbrs(4)%density = in%density(idx_k)
- neighbrs(4)%energy = in%energy(idx_k)
- neighbrs(4)%r(:) = in%r(idx_k,:)
- neighbrs(4)%U(:) = in%U(idx_k,:)
- endif
- if ( (j + 1 >= 0) .AND. (j + 1 < params%Ny) ) then
- idx_k = idx + params%Nx
- neighbrs(5)%density = in%density(idx_k)
- neighbrs(5)%energy = in%energy(idx_k)
- neighbrs(5)%r(:) = in%r(idx_k,:)
- neighbrs(5)%U(:) = in%U(idx_k,:)
- endif
- endif
- !add neighbours if dim > 2:
- if ( params%dimensions > 2 ) then
- if ( (i - 1 >= 0) .AND. (i - 1 < params%Nz) ) then
- idx_k = idx - params%Nx * params%Ny !params%Ny * params%Nx * i_k + params%Nx * j_k + k_k + 1
- neighbrs(6)%density = in%density(idx_k)
- neighbrs(6)%energy = in%energy(idx_k)
- neighbrs(6)%r(:) = in%r(idx_k,:)
- neighbrs(6)%U(:) = in%U(idx_k,:)
- endif
- if ( (i + 1 >= 0) .AND. (i + 1 < params%Nz) ) then
- idx_k = idx + params%Nx * params%Ny
- neighbrs(7)%density = in%density(idx_k)
- neighbrs(7)%energy = in%energy(idx_k)
- neighbrs(7)%r(:) = in%r(idx_k,:)
- neighbrs(7)%U(:) = in%U(idx_k,:)
- endif
- endif
- !Okay, now lets decide if we need to calculate fluxes (check density):
- !take components based on current dimension:
- if (maxval(neighbrs%density) > params%eps) then
- outflags(idx) = .TRUE.
- do dims = 1, params%dimensions
- do i_k = 0, 3
- fluidhalf(i_k+1)%density = inhalf%density(idx + offset(dims) * (i_k - 2))
- fluidhalf(i_k+1)%energy = inhalf%energy(idx + offset(dims) * (i_k - 2))
- fluidhalf(i_k+1)%U(:) = inhalf%U(idx + offset(dims) * (i_k - 2),:)
- fluidhalf(i_k+1)%r(:) = inhalf%r(idx + offset(dims) * (i_k - 2),:)
- enddo
- !calc ql:
- dr1 = fluidhalf(2)%r(dims) - fluidhalf(1)%r(dims)
- dr2 = fluidhalf(3)%r(dims) - fluidhalf(2)%r(dims)
- dr3 = fluidhalf(4)%r(dims) - fluidhalf(3)%r(dims)
- if (fluidhalf(2)%density > params%eps) then
- drx = (params%smoothing_length / 2.0 - (fluidhalf(2)%r(dims) - (params%dx/2 + (ijk(dims) - 1) * params%dx)) / 2.0)
- call minmod((fluidhalf(3)%density - fluidhalf(2)%density) / dr2, (fluidhalf(2)%density - fluidhalf(1)%density) / dr1, L)
- ql%density = fluidhalf(2)%density + drx * L
- call minmod((fluidhalf(3)%energy - fluidhalf(2)%energy) / dr2, (fluidhalf(2)%energy - fluidhalf(1)%energy) / dr1, L)
- ql%energy = fluidhalf(2)%energy + drx * L
- do j_k = 1, params%dimensions
- call minmod((fluidhalf(3)%U(j_k) - fluidhalf(2)%U(j_k)) / dr2, (fluidhalf(2)%U(j_k) - fluidhalf(1)%U(j_k)) / dr1, L)
- ql%U(j_k) = fluidhalf(2)%U(j_k) + drx * L
- enddo
- ! Vrl = ql%U(1,:) !Vrl(1) = Vxl, Vrl(2) = Vyl, Vrl(3) = Vzl
- if (ql%density > params%eps) then
- Vrl = ql%U(:) / ql%density
- pl = abs((params%gamma - 1.0) * (ql%energy - ql%density*dot_product(Vrl,Vrl) / 2.0) > params%eps) * (params%gamma - 1.0) * (ql%energy - ql%density*dot_product(Vrl,Vrl) / 2.0)
- Cl = sqrt(params%gamma * pl / ql%density)
- else
- Vrl = 0.0
- pl = abs((params%gamma - 1.0) * (ql%energy - ql%density*dot_product(Vrl,Vrl) / 2.0) > params%eps) * (params%gamma - 1.0) * (ql%energy - ql%density*dot_product(Vrl,Vrl) / 2.0)
- Cl = 0.0
- endif
- Fl%density = ql%U(dims)
- Fl%U(:) = ql%U(:) * Vrl(dims)
- Fl%energy = ql%energy * Vrl(dims)
- else
- ql%density = 0.0
- ql%energy = 0.0
- ql%U(:) = 0.0
- Fl%density = 0.0
- Fl%energy = 0.0
- Fl%U(:) = 0.0
- Cl = 0.0
- Vrl = 0.0
- endif
- ! print *, "t_debug4"
- !calc qr:
- if (fluidhalf(3)%density > params%eps) then
- drx = (params%smoothing_length / 2.0 + (fluidhalf(3)%r(dims) - (params%dx / 2.0 + ijk(dims) * params%dx)) / 2.0)
- call minmod((fluidhalf(3)%density - fluidhalf(2)%density) / dr2, (fluidhalf(4)%density - fluidhalf(3)%density) / dr3, L)
- qr%density = fluidhalf(3)%density - drx * L
- call minmod((fluidhalf(3)%energy - fluidhalf(2)%energy) / dr2, (fluidhalf(4)%energy - fluidhalf(3)%energy) / dr3, L)
- qr%energy = fluidhalf(3)%energy - drx * L
- do j_k = 1, params%dimensions
- call minmod((fluidhalf(3)%U(j_k) - fluidhalf(2)%U(j_k)) / dr2, (fluidhalf(4)%U(j_k) - fluidhalf(3)%U(j_k)) / dr3, L)
- qr%U(j_k) = fluidhalf(3)%U(j_k) - drx * L
- enddo
- ! Vrr = qr%U(1,:) !Vrr(1) = Vxr, Vrr(2) = Vyr, Vrr(3) = Vzr
- if (qr%density > params%eps) then
- Vrr = qr%U(:) / qr%density
- pr = abs((params%gamma - 1.0) * (qr%energy - qr%density*dot_product(Vrr,Vrr) / 2.0) > params%eps) * (params%gamma - 1.0) * (qr%energy - qr%density*dot_product(Vrr,Vrr) / 2.0)
- Cr = sqrt(params%gamma * pr / qr%density)
- else
- Cr = 0.0
- Vrr = 0.0
- pr = abs((params%gamma - 1.0) * (qr%energy - qr%density*dot_product(Vrr,Vrr) / 2.0) > params%eps) * (params%gamma - 1.0) * (qr%energy - qr%density*dot_product(Vrr,Vrr) / 2.0)
- endif
- Fr%density = qr%U(dims)
- Fr%U(:) = qr%U(:) * Vrr(dims)
- Fr%energy = qr%energy * Vrr(dims)
- else
- qr%density = 0.0
- qr%energy = 0.0
- qr%U(:) = 0.0
- Fr%density = 0.0
- Fr%energy = 0.0
- Fr%U(:) = 0.0
- Cr = 0.0
- Vrr = 0.0
- endif
- !calc Fx:
- if ((Cl > params%eps) .OR. (Cr > params%eps)) then
- Ll = min(Vrl(dims) - Cl, Vrr(dims) - Cr)
- Lr = max(Vrr(dims) + Cr, Vrl(dims) + Cl)
- bb = max(abs(Ll), abs(Lr))
- outfluxes(dims)%density(idx) = ( Fr%density + Fl%density + bb * (ql%density - qr%density) ) / 2.0
- outfluxes(dims)%energy(idx) = ( Fr%energy + Fl%energy + bb * (ql%energy - qr%energy) ) / 2.0
- outfluxes(dims)%U(idx,:) = ( Fr%U(:) + Fl%U(:) + bb * (ql%U(:) - qr%U(:)) ) / 2.0
- else
- outfluxes(dims)%density(idx) = 0.0
- outfluxes(dims)%energy(idx) = 0.0
- outfluxes(dims)%U(idx,:) = 0.0
- endif
- enddo
- else
- outflags(idx) = .FALSE.
- do dims = 1, params%dimensions
- outfluxes(dims)%density(idx) = 0.0
- outfluxes(dims)%energy(idx) = 0.0
- outfluxes(dims)%U(idx,:) = 0.0
- enddo
- endif
- ! outfluxes(:)%density(idx) = 0.0
- endif
- endif
- endif
- end subroutine cuda_calculate_flux_tvd
- attributes(global) subroutine cuda_finalize(outq0, in, influxes, inflags)
- type(Fluids), device :: in, outq0 !in = n+1 from corrector !outq0 = n
- type(Fluids), device :: influxes(dimen)
- type(Fluidstemp) :: fluidhalf(dimen)
- integer, dimension(:) :: offset(3)
- logical, dimension(:), device :: inflags
- integer :: dims, i, j, k, idx
- offset(1) = 1
- offset(2) = params%Nx
- offset(3) = params%Nx * params%Ny
- i = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
- j = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
- k = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
- idx = params%Ny * params%Nx * i + params%Nx * j + k + 1
- if ( (i >= 2 - params%dim3d) .AND. (i <= params%Nz-3 + params%dim3d) ) then
- if ( (j >= 2 - params%dim2d) .AND. (j <= params%Ny-3 + params%dim2d) ) then
- if ( (k >= 2) .AND. (k <= params%Nx-3) ) then
- if (inflags(idx)) then
- !calculate all neighbour fluxes (idx + offset(dims)):
- do dims = 1, params%dimensions
- fluidhalf(dims)%density = influxes(dims)%density(idx + offset(dims))
- fluidhalf(dims)%energy = influxes(dims)%energy(idx + offset(dims))
- fluidhalf(dims)%U(:) = influxes(dims)%U(idx + offset(dims),:)
- enddo
- !calculate final values:
- outq0%density(idx) = in%density(idx) + tau / params%smoothing_length * (sum(influxes(:)%density(idx)) - sum(fluidhalf(1:params%dimensions)%density))
- outq0%energy(idx) = in%energy(idx) + tau / params%smoothing_length * (sum(influxes(:)%energy(idx)) - sum(fluidhalf(1:params%dimensions)%energy))
- do dims = 1, params%dimensions
- outq0%U(idx,dims) = in%U(idx,dims) + tau / params%smoothing_length * (sum(influxes(:)%U(idx,dims)) - sum(fluidhalf(1:params%dimensions)%U(dims)))
- enddo
- else
- outq0%density(idx) = in%density(idx)
- outq0%energy(idx) = in%energy(idx)
- outq0%U(idx,:) = in%U(idx,:)
- endif
- endif
- endif
- endif
- end subroutine cuda_finalize
- attributes(device) subroutine minmod(a, b, out)
- real(kind=prk), device :: a, b, out
- out = ( 1.0 / 2.0) * (sign(1.0, a) + sign(1.0, b)) * min(abs(a), abs(b))
- end subroutine minmod
- subroutine cuda_setupSystem(inparams)
- end subroutine
- attributes(global) subroutine cuda_boundary_conditions(ptr)
- integer j, k, idx, idx_j, idx_k
- type(Fluids), device :: ptr
- integer ti, tj, tk, tidx
- ti = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
- tj = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
- tk = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
- ! idx = params%Ny * params%Nx * i + params%Nx * j + k + 1
- if (params%dimensions == 1) then
- ptr%density(1) = ptr%density(3)
- ptr%density(2) = ptr%density(1)
- ptr%density(params%Nx) = ptr%density(params%Nx - 2)
- ptr%density(params%Nx-1) = ptr%density(params%Nx)
- ptr%energy(1) = ptr%energy(3)
- ptr%energy(2) = ptr%energy(1)
- ptr%energy(params%Nx) = ptr%energy(params%Nx - 2)
- ptr%energy(params%Nx-1) = ptr%energy(params%Nx)
- ptr%U(1,1) = ptr%U(3,1)
- ptr%U(2,1) = ptr%U(1,1)
- ptr%U(params%Nx,1) = ptr%U(params%Nx - 2,1)
- ptr%U(params%Nx-1,1) = ptr%U(params%Nx,1)
- endif
- if (params%dimensions == 2) then
- ! copy x:
- if ( (k >= 0) .AND. (k <= params%Nx-1) ) then
- ! do k = 0, params%Nx-1
- idx = k + 1
- idx_j = params%Nx + idx
- idx_k = 2 * params%Nx + idx
- ptr%density(idx) = ptr%density(idx_k)
- ptr%density(idx_j) = ptr%density(idx)
- ptr%density((params%Ny - 1) * params%Nx + idx) = ptr%density((params%Ny - 3) * params%Nx + idx)
- ptr%density((params%Ny - 2) * params%Nx + idx) = ptr%density((params%Ny - 1) * params%Nx + idx)
- ptr%energy(idx) = ptr%energy(idx_k)
- ptr%energy(idx_j) = ptr%energy(idx)
- ptr%energy((params%Ny - 1) * params%Nx + idx) = ptr%energy((params%Ny - 3) * params%Nx + idx)
- ptr%energy((params%Ny - 2) * params%Nx + idx) = ptr%energy((params%Ny - 1) * params%Nx + idx)
- ptr%U(idx,1) = ptr%U(idx_k,1)
- ptr%U(idx_j,1) = ptr%U(idx,1)
- ptr%U(idx,2) = ptr%U(idx_k,2)
- ptr%U(idx_j,2) = ptr%U(idx,2)
- ptr%U((params%Ny - 1) * params%Nx + idx,1) = ptr%U((params%Ny - 3) * params%Nx + idx,1)
- ptr%U((params%Ny - 2) * params%Nx + idx,1) = ptr%U((params%Ny - 1) * params%Nx + idx,1)
- ptr%U((params%Ny - 1) * params%Nx + idx,2) = ptr%U((params%Ny - 3) * params%Nx + idx,2)
- ptr%U((params%Ny - 2) * params%Nx + idx,2) = ptr%U((params%Ny - 1) * params%Nx + idx,2)
- endif
- ! copy y:
- if ( (j >= 0) .AND. (j <= params%Ny-1) ) then
- ! do k = 0, params%Ny-1
- idx = params%Nx * j + 1
- idx_j = idx + 1
- idx_k = idx + 2
- ptr%density(idx) = ptr%density(idx_k)
- ptr%density(idx_j) = ptr%density(idx)
- ptr%density(params%Nx + idx - 1) = ptr%density(params%Nx + idx - 3)
- ptr%density(params%Nx + idx - 2) = ptr%density(params%Nx + idx - 1)
- ptr%energy(idx) = ptr%energy(idx_k)
- ptr%energy(idx_j) = ptr%energy(idx)
- ptr%energy(params%Nx + idx - 1) = ptr%energy(params%Nx + idx - 3)
- ptr%energy(params%Nx + idx - 2) = ptr%energy(params%Nx + idx - 1)
- ptr%U(idx,1) = ptr%U(idx_k,1)
- ptr%U(idx_j,1) = ptr%U(idx,1)
- ptr%U(idx,2) = ptr%U(idx_k,2)
- ptr%U(idx_j,2) = ptr%U(idx,2)
- ptr%U(params%Nx + idx - 1,1) = ptr%U(params%Nx + idx - 3,1)
- ptr%U(params%Nx + idx - 2,1) = ptr%U(params%Nx + idx - 1,1)
- ptr%U(params%Nx + idx - 1,2) = ptr%U(params%Nx + idx - 3,2)
- ptr%U(params%Nx + idx - 2,2) = ptr%U(params%Nx + idx - 1,2)
- endif
- endif
- if (params%dimensions == 3) then
- !copy (for [y x z])
- if ( (j >= 0) .AND. (j <= params%Nz-1) ) then
- if ( (k >= 0) .AND. (k <= params%Ny-1) ) then
- idx = params%Nx * params%Ny * j + params%Nx * k + 1
- idx_j = idx + 1
- idx_k = idx + 2
- ptr%density(idx) = ptr%density(idx_k)
- ptr%density(idx_j) = ptr%density(idx)
- ptr%density(params%Nx + idx - 1) = ptr%density(params%Nx + idx - 3)
- ptr%density(params%Nx + idx - 2) = ptr%density(params%Nx + idx - 1)
- ptr%energy(idx) = ptr%energy(idx_k)
- ptr%energy(idx_j) = ptr%energy(idx)
- ptr%energy(params%Nx + idx - 1) = ptr%energy(params%Nx + idx - 3)
- ptr%energy(params%Nx + idx - 2) = ptr%energy(params%Nx + idx - 1)
- ptr%U(idx,1) = ptr%U(idx_k,1)
- ptr%U(idx_j,1) = ptr%U(idx,1)
- ptr%U(idx,2) = ptr%U(idx_k,2)
- ptr%U(idx_j,2) = ptr%U(idx,2)
- ptr%U(idx,3) = ptr%U(idx_k,3)
- ptr%U(idx_j,3) = ptr%U(idx,3)
- ptr%U(params%Nx + idx - 1,1) = ptr%U(params%Nx + idx - 3,1)
- ptr%U(params%Nx + idx - 2,1) = ptr%U(params%Nx + idx - 1,1)
- ptr%U(params%Nx + idx - 1,2) = ptr%U(params%Nx + idx - 3,2)
- ptr%U(params%Nx + idx - 2,2) = ptr%U(params%Nx + idx - 1,2)
- ptr%U(params%Nx + idx - 1,3) = ptr%U(params%Nx + idx - 3,3)
- ptr%U(params%Nx + idx - 2,3) = ptr%U(params%Nx + idx - 1,3)
- endif
- endif
- !copy (for [x x z])
- if ( (j >= 0) .AND. (j <= params%Nz-1) ) then
- if ( (k >= 0) .AND. (k <= params%Nx-1) ) then
- idx = params%Nx * params%Ny * j + k + 1
- idx_j = idx + params%Nx
- idx_k = idx + 2 * params%Nx
- ptr%density(idx) = ptr%density(idx_k)
- ptr%density(idx_j) = ptr%density(idx)
- ptr%density((params%Ny - 1) * params%Nx + idx) = ptr%density((params%Ny - 3) * params%Nx + idx)
- ptr%density((params%Ny - 2) * params%Nx + idx) = ptr%density((params%Ny - 1) * params%Nx + idx)
- ptr%energy(idx) = ptr%energy(idx_k)
- ptr%energy(idx_j) = ptr%energy(idx)
- ptr%energy((params%Ny - 1) * params%Nx + idx) = ptr%energy((params%Ny - 3) * params%Nx + idx)
- ptr%energy((params%Ny - 2) * params%Nx + idx) = ptr%energy((params%Ny - 1) * params%Nx + idx)
- ptr%U(idx,1) = ptr%U(idx_k,1)
- ptr%U(idx_j,1) = ptr%U(idx,1)
- ptr%U(idx,2) = ptr%U(idx_k,2)
- ptr%U(idx_j,2) = ptr%U(idx,2)
- ptr%U(idx,3) = ptr%U(idx_k,3)
- ptr%U(idx_j,3) = ptr%U(idx,3)
- ptr%U((params%Ny - 1) * params%Nx + idx,1) = ptr%U((params%Ny - 3) * params%Nx + idx,1)
- ptr%U((params%Ny - 2) * params%Nx + idx,1) = ptr%U((params%Ny - 1) * params%Nx + idx,1)
- ptr%U((params%Ny - 1) * params%Nx + idx,2) = ptr%U((params%Ny - 3) * params%Nx + idx,2)
- ptr%U((params%Ny - 2) * params%Nx + idx,2) = ptr%U((params%Ny - 1) * params%Nx + idx,2)
- ptr%U((params%Ny - 1) * params%Nx + idx,3) = ptr%U((params%Ny - 3) * params%Nx + idx,3)
- ptr%U((params%Ny - 2) * params%Nx + idx,3) = ptr%U((params%Ny - 1) * params%Nx + idx,3)
- endif
- endif
- !copy (for [x x y])
- if ( (j >= 0) .AND. (j <= params%Ny-1) ) then
- if ( (k >= 0) .AND. (k <= params%Nx-1) ) then
- idx = params%Nx * j + k + 1
- idx_j = idx + params%Nx * params%Ny
- idx_k = idx + params%Nx * params%Ny * 2
- ptr%density(idx) = ptr%density(idx_k)
- ptr%density(idx_j) = ptr%density(idx)
- ptr%density(params%Nx * params%Ny * (params%Nz - 1) + idx) = ptr%density(params%Nx * params%Ny * (params%Nz - 3) + idx)
- ptr%density(params%Nx * params%Ny * (params%Nz - 2) + idx) = ptr%density(params%Nx * params%Ny * (params%Nz - 1) + idx)
- ptr%energy(idx) = ptr%energy(idx_k)
- ptr%energy(idx_j) = ptr%energy(idx)
- ptr%energy(params%Nx * params%Ny * (params%Nz - 1) + idx) = ptr%energy(params%Nx * params%Ny * (params%Nz - 3) + idx)
- ptr%energy(params%Nx * params%Ny * (params%Nz - 2) + idx) = ptr%energy(params%Nx * params%Ny * (params%Nz - 1) + idx)
- ptr%U(idx,1) = ptr%U(idx_k,1)
- ptr%U(idx_j,1) = ptr%U(idx,1)
- ptr%U(idx,2) = ptr%U(idx_k,2)
- ptr%U(idx_j,2) = ptr%U(idx,2)
- ptr%U(idx,3) = ptr%U(idx_k,3)
- ptr%U(idx_j,3) = ptr%U(idx,3)
- ptr%U(params%Nx * params%Ny * (params%Nz - 1) + idx,1) = ptr%U(params%Nx * params%Ny * (params%Nz - 3) + idx,1)
- ptr%U(params%Nx * params%Ny * (params%Nz - 2) + idx,1) = ptr%U(params%Nx * params%Ny * (params%Nz - 1) + idx,1)
- ptr%U(params%Nx * params%Ny * (params%Nz - 1) + idx,2) = ptr%U(params%Nx * params%Ny * (params%Nz - 3) + idx,2)
- ptr%U(params%Nx * params%Ny * (params%Nz - 2) + idx,2) = ptr%U(params%Nx * params%Ny * (params%Nz - 1) + idx,2)
- ptr%U(params%Nx * params%Ny * (params%Nz - 1) + idx,3) = ptr%U(params%Nx * params%Ny * (params%Nz - 3) + idx,3)
- ptr%U(params%Nx * params%Ny * (params%Nz - 2) + idx,3) = ptr%U(params%Nx * params%Ny * (params%Nz - 1) + idx,3)
- endif
- endif
- endif
- end subroutine
- attributes(global) subroutine initial_conditions(allf)
- type(Fluids), device :: allf
- integer :: i, j, k, dims, ijk(3), N
- i = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
- j = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
- k = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
- N = params%Nx
- idx = params%Ny * params%Nx * i + params%Nx * j + k + 1
- ijk(1) = k
- ijk(2) = j
- ijk(3) = i
- ! print *, "idx", idx
- ! print *, "ijk", i, j, k
- if (idx > (params%totalcells * 0.51) .AND. idx < (params%totalcells * 0.61)) then ! wave
- allfluid%density(idx) = 11.3
- allfluid%U(idx,:) = -22.2
- allfluid%energy(idx) = 45.0
- ! if (idx < (params%totalcells * 0.4)) then
- else if (idx > (params%totalcells * 0.39) .AND. idx < (params%totalcells * 0.49)) then ! wave
- allfluid%density(idx) = 11.3
- allfluid%U(idx,:) = 22.2
- allfluid%energy(idx) = 45.0
- else
- allfluid%density(idx) = 11.3
- allfluid%U(idx,:) = 0.0
- allfluid%energy(idx) = 0.0
- endif
- ! else
- ! allfluid%density(idx) = 20.0
- ! allfluid%U(idx,:) = 0.0
- ! allfluid%energy(idx) = 0.0
- ! endif
- do dims = 1, params%dimensions
- allf%r(idx,dims) = params%dx / 2.0 + ijk(dims) * params%dx
- enddo
- end subroutine
- subroutine calculateSmoothingGradientW0(inp)
- type(Parameters) :: inp
- real(kind=prk) dW, s
- real(kind=prk) dr_ik(inp%dimensions)
- integer i, j, k, i_max, j_max, k_max, dims, ijk(inp%dimensions)
- if ( inp%dimensions == 3 ) then
- i_max = 3; j_max = 3; k_max = 3;
- else if ( inp%dimensions == 2 ) then
- i_max = 1; j_max = 3; k_max = 3;
- else
- i_max = 1; j_max = 1; k_max = 3;
- endif
- do i = 0, i_max-1
- do j = 0, j_max-1
- do k = 0, k_max-1
- if (inp%dimensions == 1) then
- ijk(1) = k;
- else if (inp%dimensions == 2) then
- ijk(1) = j; ijk(2) = k;
- else
- ijk(1) = i; ijk(2) = j; ijk(3) = k;
- endif
- do dims = 1, inp%dimensions
- dr_ik(dims) = inp%smoothing_length*(1-ijk(dims))
- enddo
- s = dot_product(dr_ik, dr_ik)
- s = sqrt(s);
- dW = gradW(s, inp);
- inp%nablaW0(inp%WNx * inp%WNx * i + inp%WNx * j + k + 1, :) = dW * dr_ik
- enddo
- enddo
- enddo
- end subroutine
- function gradW(s, inp)
- type(Parameters), intent(in) :: inp
- real(kind=prk) :: s, ss, dW, gradW
- ss = s / inp%smoothing_length
- if ((ss > 0) .AND. (ss < 1.0)) then
- dW = (-3.0 + 2.25 * ss ) * ss
- else if((ss < 2.0) .AND. (ss >= 1.0)) then
- dW = (2.0 - ss); dW = -0.75 * dW * dW
- else
- dW=0.0;
- endif
- if(s > inp%eps) then
- gradW = inp%cnst * dW / s / inp%smoothing_length
- else
- gradW = 0.0
- endif
- end function gradW
- attributes(device) subroutine calculateSmoothingGradientW(r_i, r_k, W)
- real(kind=prk) q, s
- real(kind=prk), device :: r_i(dimen), r_k(dimen), W(dimen)
- s = sqrt(SUM((r_i - r_k) ** 2))
- q = s / params%smoothing_length
- if (s > params%eps) then
- !if s (0;1]
- if ( (q > 0) .AND. (q <= 1.0) ) then
- W = (r_i - r_k) * params%cnst * ( -3.0 + 2.25 * q ) * q / s / params%smoothing_length
- ! W = params%cnst * (r_k - r_i) * (4.0 * params%smoothing_length - 3.0 * s)
- !if s (1;2] :
- else if ( (q > 1.0) .AND. (q <= 2.0) ) then
- W = (r_i - r_k) * params%cnst * ( -0.75 * (2.0 - q) ** 2 ) / s / params%smoothing_length
- ! W = params%cnst * (r_k - r_i) * ( (s - 2.0 * params%smoothing_length) ** 2 ) / s
- else
- W = 0.0
- endif
- else
- W = 0.0
- endif
- end subroutine
- subroutine cuda_run(t_save, t_end, t_step)
- real(kind=prk) :: t_save, t_end, t_step
- real(kind=prk) :: total_time, current_time, end_time, save_time
- real(kind=prk) :: iter, h_tau
- type(dim3) :: blocks, threads
- character(len=70) :: fn, tmp
- type(Fluids) :: h_all
- type(Fluidstemp), device :: minz, maxz
- type(Fluidstemp) :: h_minz, h_maxz
- type(Parameters) :: outp
- type ( cudaEvent ) :: startEvent , stopEvent
- real :: time, random, tmpa
- integer :: istat
- integer, parameter :: outunit=44
- integer :: filenum, i
- logical :: flag
- outp = params
- if (outp%dimensions == 1) then
- blocks = dim3(outp%Nx / BLOCK_SIZE, 1, 1)
- threads = dim3(BLOCK_SIZE, 1, 1)
- else if (outp%dimensions == 2) then
- blocks = dim3(outp%Nx / BLOCK_SIZE, outp%Ny / BLOCK_SIZE, 1)
- threads = dim3(BLOCK_SIZE, BLOCK_SIZE, 1)
- else
- blocks = dim3(outp%Nx / BLOCK_SIZE, outp%Ny / BLOCK_SIZE, outp%Nz / BLOCK_SIZE)
- threads = dim3(BLOCK_SIZE, BLOCK_SIZE, BLOCK_SIZE)
- endif
- !print *, "run!"
- flag = .FALSE.
- filenum = 1
- current_time = 0.0
- save_time = 0.0
- iter = 1.0
- end_time = t_end
- time_steps = 1000000.0
- call initial_conditions<<<blocks, threads>>>(allfluid)
- !FTheta plot:
- h_all = allfluid
- ! build filename -- i.grd
- write(fn,fmt='(a)') 'ftheta.txt'
- ! open it with a fixed unit number
- open(unit=outunit,file=fn, form='formatted')
- ! write something
- do i = 1, outp%totalcells
- write(outunit, '(F0.7,x,F0.7)') h_all%r(i,1), host_ftheta_k(i, outp)
- enddo
- ! close it
- close(outunit)
- !Calculate:
- do while (current_time < end_time)
- write(tmp,fmt='(a,i0,a)') '(',outp%Nx, 'F)'
- ! do while (iter < 6)
- call cuda_boundary_conditions<<<blocks, threads>>>(allfluid)
- call cuda_calculate_dt<<<blocks, threads>>>(allfluid, time_steps)
- call thrustsort(time_steps,size(time_steps))
- istat = cudaMemcpy(tau, time_steps(1), 1, cudaMemcpyDeviceToDevice)
- h_tau = tau
- call cuda_calculate_forces_sph_predictor<<<blocks, threads>>>(allfluid, allforces)
- call cuda_calculate_predictor_sph<<<blocks, threads>>>(allfluid, allforces, allfluidhalf, tau)
- call cuda_boundary_conditions<<<blocks, threads>>>(allfluidhalf)
- call cuda_calculate_forces_sph_corrector<<<blocks, threads>>>(allfluid, allfluidhalf, allfluidnext)
- call cuda_boundary_conditions<<<blocks, threads>>>(allfluidnext)
- call cuda_calculate_fluidhalf<<<blocks, threads>>>(allfluid, allfluidnext, allfluidhalf)
- call cuda_calculate_flux_tvd<<<blocks, threads>>>(allfluidnext, allfluidhalf, allfluid, allfluxes, boolflags)
- ! h_all = allfluxes(1)
- ! print *, "Debug: density sum:", sum(abs(h_all%density))
- ! print *, "Debug: energy sum:", sum(abs(h_all%energy))
- ! print *, "Debug: impulse sum:", sum(abs(h_all%U))
- call cuda_finalize<<<blocks, threads>>>(allfluid, allfluidnext, allfluxes, boolflags)
- if (current_time >= save_time) then
- call integral_values<<<blocks,threads>>>(allfluidnext, allfluid)
- istat = cudaThreadSynchronize()
- h_tau = tau
- h_all = allfluidnext
- print *, "density sum:", sum(abs(h_all%density))
- print *, "energy sum:", sum(abs(h_all%energy))
- print *, "impulse sum:", sum(abs(h_all%U))
- print *, "current time:", current_time
- print *, "save time:", save_time
- print *, "dt:", h_tau
- print *, "iteration:", iter
- random = 0.0
- do i = 1, outp%totalcells
- if (h_all%density(i) > outp%eps) then
- tmpa = outp%gamma * h_all%energy(i) / h_all%density(i)
- if (tmpa > outp%eps) then
- tmpa = sqrt(tmpa)
- tmpa = sqrt(dot_product(h_all%U(i,:),h_all%U(i,:))) / tmpa
- endif
- if (tmpa > random) then
- random = tmpa
- endif
- endif
- enddo
- print *, 'Mach: ', random
- ! call minmax<<<1,1>>>(allfluidnext, minz, maxz)
- istat = cudaMemcpy(time_steps, allfluidnext%density, grid**dimen, cudaMemcpyDeviceToDevice)
- call thrustsort(time_steps,size(time_steps))
- h_minz%density = time_steps(1)
- h_maxz%density = time_steps(grid**dimen)
- istat = cudaMemcpy(time_steps, allfluidnext%energy, grid**dimen, cudaMemcpyDeviceToDevice)
- call thrustsort(time_steps,size(time_steps))
- h_minz%energy = time_steps(1)
- h_maxz%energy = time_steps(grid**dimen)
- do i = 1, dimen
- istat = cudaMemcpy(time_steps, allfluidnext%U(:,i), grid**dimen, cudaMemcpyDeviceToDevice)
- call thrustsort(time_steps,size(time_steps))
- h_minz%U(i) = time_steps(1)
- h_maxz%U(i) = time_steps(grid**dimen)
- enddo
- h_all = allfluidnext
- !save data to hard drive:
- ! build filename -- i.grd
- write(fn,fmt='(a,i4,a)') '1d_cuda_rho_', filenum, '.txt'
- ! open it with a fixed unit number
- open(unit=outunit,file=fn, form='formatted')
- ! write something
- do i = 1, outp%totalcells
- write(outunit, '(F0.7,x,F0.7)') h_all%r(i,1), h_all%density(i)
- enddo
- ! close it
- close(outunit)
- ! build filename -- i.grd
- write(fn,fmt='(a,i4,a)') '1d_cuda_p_', filenum, '.txt'
- ! open it with a fixed unit number
- open(unit=outunit,file=fn, form='formatted')
- ! write something
- do i = 1, outp%totalcells
- write(outunit, '(F0.7,x,F0.7)') h_all%r(i,1), h_all%energy(i)
- enddo
- ! close it
- close(outunit)
- ! build filename -- i.grd
- write(fn,fmt='(a,i4,a)') '1d_cuda_Vx_', filenum, '.txt'
- ! open it with a fixed unit number
- open(unit=outunit,file=fn, form='formatted')
- ! write something
- do i = 1, outp%totalcells
- write(outunit, '(F0.7,x,F0.7)') h_all%r(i,1), h_all%U(i,1)
- enddo
- ! close it
- close(outunit)
- filenum = filenum + 1
- save_time = save_time + t_save
- endif
- !print *, 'current_time, h_tau', current_time, h_tau
- !print *, 'iter', iter
- current_time = current_time + h_tau
- iter = iter + 1.0
- enddo
- end subroutine
- function host_ftheta_k(indx, inp)
- real(kind=prk) :: host_ftheta_k, coord
- type(Parameters) :: inp
- integer :: indx
- coord = inp%smoothing_length / 2.0 + real(indx, kind=prk) * inp%smoothing_length
- if (indx < inp%totalcells/3 .AND. indx > inp%totalcells/6) then
- ! ftheta_k = -4.0 / ( Exp(2.0 * coord) + 2.0 + Exp(-2.0 * coord) )
- host_ftheta_k = inp%ta * coord ** 2 + inp%tb * coord + inp%tc
- else
- host_ftheta_k = 0.0
- endif
- ! ftheta_k = 0.0
- end function host_ftheta_k
- attributes(global) subroutine minmax(in, outmin, outmax)
- type(Fluids), device :: in
- type(Fluidstemp), device :: outmax, outmin
- integer i
- !find min and max values
- outmin%density = minval(in%density(:))
- outmin%energy = minval(in%energy(:))
- outmax%density = maxval(in%density(:))
- outmax%energy = maxval(in%energy(:))
- do i = 1, params%dimensions
- outmin%U(i) = minval(in%U(:,i))
- outmax%U(i) = maxval(in%U(:,i))
- enddo
- end subroutine
- attributes(global) subroutine integral_values(out, in)
- type(Fluids), device :: out, in
- real(kind=prk) :: tmp(dimen)
- integer :: i, j, k, idx
- tmp = 0.0
- i = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
- j = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
- k = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
- idx = params%Ny * params%Nx * i + params%Nx * j + k + 1
- !density:
- out%density(idx) = in%density(idx)
- !pressure:
- if (in%density(idx) > params%eps) then
- !velocity:
- tmp(1:params%dimensions) = in%U(idx,:) / in%density(idx)
- out%U(idx,:) = tmp(1:params%dimensions)
- else
- tmp = 0.0
- out%U(idx,:) = 0.0
- endif
- out%energy(idx) = (params%gamma - 1.0) * (in%energy(idx) - 0.5 * in%density(idx) * dot_product(tmp, tmp))
- if (out%energy(idx) < params%eps) then
- out%energy(idx) = 0.0
- endif
- end subroutine
- subroutine fthetacoef(inp)
- real(kind=prk) :: minus
- real(kind=prk) :: x1, x2, x3, y1, y2, y3
- type(Parameters) :: inp
- minus = 0.0
- !calc x1-x3,y1-y3:
- x1 = inp%smoothing_length / 2.0 + (inp%totalcells / 6.0) * inp%smoothing_length
- x3 = inp%smoothing_length / 2.0 + (inp%totalcells / 3.0) * inp%smoothing_length
- x2 = (x3 + x1) / 2.0
- y1 = 0.0
- y2 = minus
- y3 = 0.0
- print *, x1, x2, x3
- print *, y1, y2, y3
- !calc coef:
- inp%ta = (y3 - (x3 * (y2 - y1) + x2 * y1 - x1 * y2) / (x2 - x1) ) / (x3 * (x3 - x1 - x2) + x1 * x2)
- inp%tb = (y2 - y1) / (x2 - x1) - inp%ta * (x1 + x2)
- inp%tc = (x2 * y1 - x1 * y2) / (x2 - x1) + inp%ta * x1 * x2
- end subroutine
- end module CUDASphTvd
- program GPU_cSPH_TVD
- use CUDASphTvd
- use cudafor
- use thrust
- implicit none
- type(Parameters) :: inparams
- type(test), allocatable, device :: z(:)
- real, allocatable, device :: zz(:)
- type(test) :: h_z
- type(test), device :: d_z
- integer :: istat
- ! ! cuda events for elapsing
- ! type ( cudaEvent ) :: startEvent , stopEvent
- ! real :: time, random
- ! integer :: istat
- !
- ! ! Create events
- ! istat = cudaEventCreate ( startEvent )
- ! istat = cudaEventCreate ( stopEvent )
- !
- !
- ! istat = cudaEventRecord ( startEvent , 0)
- ! call thrustsort(gpuData,size(gpuData))
- !
- ! istat = cudaEventRecord ( stopEvent , 0)
- ! istat = cudaEventSynchronize ( stopEvent )
- ! istat = cudaEventElapsedTime ( time , startEvent , stopEvent )
- !
- ! print *," Sorted array in:",time," (ms)"
- inparams%smoothing_length = 0.5
- inparams%dx = 0.5
- inparams%courant = 0.0001
- inparams%eps = 0.0000001
- ! host%gamma = 1.5
- inparams%gamma = 1.5
- inparams%rho_0 = 1
- inparams%c_0 = 1
- inparams%p_0 = 1
- inparams%dimensions = dimen
- inparams%Nx = grid
- inparams%Ny = abs(dimen > 1) * grid + abs(dimen /= 2)
- inparams%Nz = abs(dimen > 2) * grid + abs(dimen /= 3)
- inparams%cnst = 10.0 / 7.0 / 3.1415926535897932384626433832795
- inparams%totalcells = inparams%Nx * inparams%Ny * inparams%Nz
- inparams%neighbourscount = 3 ** inparams%dimensions
- if (inparams%dimensions == 1) then
- inparams%dim3d = 2; inparams%dim2d = 2
- inparams%Wnx = 0; inparams%Wny = 0
- else if (inparams%dimensions == 2) then
- inparams%dim3d = 2; inparams%dim2d = 0
- inparams%Wnx = 3; inparams%Wny = 0
- else if (inparams%dimensions == 3) then
- inparams%dim3d = 0; inparams%dim2d = 0
- inparams%Wnx = 3; inparams%Wny = 3
- endif
- ! print *, "dbg!"
- call calculateSmoothingGradientW0(inparams)
- ! print *, "dbg!"
- call fthetacoef(inparams)
- print *, inparams%ta, inparams%tb, inparams%tc
- params = inparams
- allocate(allfluid, allfluidhalf, allfluidnext, allforces)
- allocate(allfluxes(dimen))
- allocate(time_steps(inparams%totalcells))
- allocate(boolflags(inparams%totalcells))
- allocate(tau)
- print *, "START!"
- call cuda_run(0.01, 30.0, 0.0)
- print *, "DONE CUDA!"
- !Mach : |u|/Cs
- end program GPU_cSPH_TVD
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement