• API
• FAQ
• Tools
• Trends
• Archive
daily pastebin goal
78%
SHARE
TWEET

# Untitled

a guest Dec 25th, 2011 172 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. !  coursework_2011.cuf
2. !
3. !  FUNCTIONS:
4. !  COURSEW      - Entry point of console application.
5. !
6. !
7. !
8. !****************************************************************************
9. !
10. !  PROGRAM:  GPU_cSPH_TVD
11. !
12. !  PURPOSE:  Use GPU computing for cSPH-TVD method
13. !
14. !****************************************************************************
15.
16. module thrust
17.
18. interface thrustsort
19. subroutine sort_int( input,N) bind(C,name="sort_int_wrapper")
20. use iso_c_binding
21. integer(c_int),device:: input(*)
22. integer(c_int),value:: N
23. end subroutine
24.
25. subroutine sort_float( input,N) bind(C,name="sort_float_wrapper")
26. use iso_c_binding
27. real(c_float),device:: input(*)
28. integer(c_int),value:: N
29. end subroutine
30.
31. subroutine sort_double( input,N) bind(C,name="sort_double_wrapper")
32. use iso_c_binding
33. real(c_double),device:: input(*)
34. integer(c_int),value:: N
35. end subroutine
36.
37. end interface
38.
39. end module thrust
40.
41. module fluid
42.   use iso_c_binding
43.   implicit none
44.   integer, parameter :: dimen = 1
45.   integer, parameter :: grid  = 512
46.   integer, parameter :: prk   = 4
47.   type Fluids
48.     ! density = P, energy = e
49.     ! U = (U_x, U_y, U_z)
50.     ! r = (x, y, z)
51.     real(kind=prk) :: density(grid**dimen), energy(grid**dimen)
52.     real(kind=prk) :: U(grid**dimen,dimen) !U_x(:), U_y(:), U_z(:) ! speed components
53.     real(kind=prk) :: r(grid**dimen,dimen) !x(:), y(:), z(:)
54.   end type
55.
56.   type test
57.     real, allocatable :: p(:)
58.   end type
59.
60.   type Fluidstemp
61.     sequence
62.     real(kind=prk) :: z, energy, density
63.     real(kind=prk) :: U(dimen)
64.     real(kind=prk) :: r(dimen)
65.   end type
66.
67.   type Parameters
68.     real(kind=prk) dx ! center of cells = dx/2 + i*dx
69.     integer Nx, Ny, Nz ! dimension number of cells, example: Nx = 10, Ny = 15, Nz = 20 -> [10x15x20]
70.     integer N !total number of cells: [10x15x20] = 3000
71.     real(kind=prk) monaghan_multiplier, smoothing_length
72.     real(kind=prk) gamma, rho_0, c_0, p_0, cnst, eps, courant
73.     real(kind=prk) :: nablaW0(3 ** dimen, dimen)
74.     real(kind=prk) :: ta, tb, tc, minus
75.     integer totalcells, neighbourscount
76.     integer dimensions, dim2d, dim3d !1D, 2D or 3D
77.     integer WNx, WNy
78.   end type
79.
80. end module fluid
81.
82. module CUDASphTvd
83.   use cudafor
85.   use thrust
86.   use fluid
87.   integer, parameter :: BLOCK_SIZE=16
88.   type(Fluids), allocatable, device :: allfluid, allfluidnext, allfluidhalf, allforces !allfluidhalf = (allfluid+allfluidnext) / 2
89.   type(Fluids), allocatable, device :: allfluxes(:)
90.   real(kind=prk), allocatable, device :: time_steps(:)
91.   logical, allocatable, device :: boolflags(:)
92.   type(Parameters), constant :: params
93.   real(kind=prk), allocatable, device :: tau
94.   real(kind=prk), allocatable, device :: tautemp(:)
95.   contains
96.
97.   attributes(global) subroutine cuda_calculate_forces_sph_predictor(in, outforce)
98.     type(Fluids), device :: in, outforce
99.     integer :: i, j, k, idx, idx_k
100.     !temporary variables for speed-up:
101.     real(kind=prk) :: H_i, H_k, sqrU_i
102.     !temporary 'fluid' variables:
103.     real(kind=prk) :: i_density, i_energy, theta_k, tmp
104.     real(kind=prk) :: Ui(dimen), i_impulse(dimen)
105.     real(kind=prk) :: Uk(dimen), tmpW(dimen)
106.
107.     i   = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
108.     j   = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
109.     k   = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
110.     idx = params%Ny * params%Nx * i + params%Nx * j + k + 1
111.
112.     sqrU_i = 0.0
113.     !calculate Fi:
114.     !get fluid_i characteristics (save in temp):
115.     if (in%density(idx) > params%eps) then
116.       Ui(1:params%dimensions) = in%U(idx,:) / in%density(idx)
117.     else
118.       Ui = 0.0
119.     endif
120.
121.     i_density = in%density(idx)
122.     i_energy  = in%energy(idx)
123.     i_impulse = 0.0
124.
125.     !calculate H_i
126.     sqrU_i  = dot_product(Ui, Ui)
127.     tmp     = 2.0 * ( (params%gamma - 1.0) * (i_energy - (i_density * sqrU_i) / 2.0) )
128.     if (tmp > params%eps) then
129.       H_i   = sqrt(tmp)
130.     else
131.       H_i   = 0.0
132.     endif
133. !print *, "H_i", tmp
134.     !calculate sum(W_{ik}) and H_k and sum(Ui+Uk):
135.     H_k     = 0.0
136.     theta_k = 0.0
137.     i_energy= 0.0
138.
139.     !sum_{k} {Ui +Uk} = sum_{k} {Ui} + sum_{k} {Uk} = k * Ui + sum_{k} {Uk} :
140.     do i_k = i - 1, i + 1
141.       if ( (i_k >= 0) .AND. (i_k < params%Nz) ) then
142.         do j_k = j - 1, j + 1
143.           if ( (j_k >= 0) .AND. (j_k < params%Ny) ) then
144.             do k_k = k - 1, k + 1
145.               if ( (k_k >= 0) .AND. (k_k < params%Nx) ) then
146.                 idx_k     = params%Ny * params%Nx * i_k + params%Nx * j_k + k_k + 1
147.
148.                 if (in%density(idx_k) > params%eps) then
149.                   Uk(1:params%dimensions) = in%U(idx_k,:) / in%density(idx_k)
150.                 else
151.                   Uk                      = 0.0
152.                 endif
153.                 sqrU_i    = dot_product(Uk, Uk)
154.                 tmp       = 2.0 * ( (params%gamma - 1.0) * (in%energy(idx_k) - (in%density(idx_k) * sqrU_i) / 2.0))
155.                 if (tmp > params%eps) then
156.                   H_k     = sqrt(tmp)
157.                 else
158.                   H_k     = 0.0
159.                 endif
160.                 tmpW(1:params%dimensions) = params%nablaW0(params%WNx * params%WNy * (i_k - i + 1) + params%WNx * (j_k - j + 1) + k_k - k + 2,:)
161.
162.                 if (H_i > params%eps) then
163.                   i_impulse = i_impulse - tmpW * H_k - tmpW * i_density / H_i * ftheta_k(idx_k)
164.                   i_energy  = i_energy - H_k * dot_product(Ui + Uk, tmpW) / 2.0 - ftheta_k(idx_k) * dot_product(in%U(idx,:), tmpW) / H_i
165.                 else
166.                   i_impulse = i_impulse - tmpW * H_k
167.                   i_energy  = i_energy - H_k * dot_product(Ui + Uk, tmpW) / 2.0
168.                 endif
169.
170.               endif
171.             enddo
172.           endif
173.         enddo
174.       endif
175.     enddo
176.
177.
178.     !calculate predictor forces:
179.     i_impulse          = H_i * i_impulse
180.     i_energy           = H_i * i_energy
181.     outforce%density(idx) = 0.0
182.     if (i_density > params%eps) then
183. !print *, "i_impulse, i_energy", i_impulse, i_energy
184.       outforce%U(idx,:)     = i_impulse(1:params%dimensions)
185.       outforce%energy(idx)  = i_energy
186.     else
187.       outforce%U(idx,:)     = 0.0
188.       outforce%energy(idx)  = 0.0
189.     endif
190.
191.   end subroutine
192.
193.   attributes(device) function ftheta_k(indx)
194.     real(kind=prk) :: ftheta_k, coord
195.     integer :: indx
196.
197.     coord = params%smoothing_length / 2.0 + real(indx, kind=prk) * params%smoothing_length
198.
199.     if (indx < params%totalcells/3 .AND. indx > params%totalcells/6) then
200. !      ftheta_k = -4.0 / ( Exp(2.0 * coord) + 2.0 + Exp(-2.0 * coord) )
201.       ftheta_k = params%ta * coord ** 2 + params%tb * coord + params%tc
202.     else
203.       ftheta_k = 0.0
204.     endif
205.       ftheta_k = 0.0
206.   end function ftheta_k
207.
208.   attributes(global) subroutine cuda_calculate_dt(in, outdt)
209.     type(Fluids), device :: in
210.     real(kind=prk), dimension(:), device :: outdt
211.     real(kind=prk) :: Cs
212.     integer :: idx, i, j, k
213.
214.     i   = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
215.     j   = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
216.     k   = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
217.
218.     idx = params%Ny * params%Nx * i + params%Nx * j + k + 1
219.     outdt(idx) = 10000000.0
220.     if (in%density(idx) > params%eps) then
221.       Cs = params%gamma * (params%gamma - 1.0) * (in%energy(idx) - in%density(idx)*dot_product(in%U(idx,:) / in%density(idx),in%U(idx,:) / in%density(idx)) / 2.0) / in%density(idx)
222.       Cs = (abs(Cs > params%eps) * sqrt(abs(Cs)))
223.       outdt(idx) = params%courant * min(params%smoothing_length / (2.0 * maxval(abs(in%U(idx,:) / in%density(idx)))), params%smoothing_length / (maxval(abs(in%U(idx,:) / in%density(idx))) + Cs))
224.     endif
225.
226.   end subroutine cuda_calculate_dt
227.
228.   attributes(global) subroutine cuda_calculate_predictor_sph(in, inforce, outhalf, tau)
229.     integer idx, i, j, k
230.     !temporary 'fluid' variables:
231.     real(kind=prk), device :: tau
232.     type(Fluids), device :: in, inforce, outhalf
233.     !formula 16:
234.
235.     i   = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
236.     j   = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
237.     k   = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
238.
239.     !calculate H_i:
240.     idx = params%Ny * params%Nx * i + params%Nx * j + k + 1
241.     !calculate predictor based on calculated forces inb4:
242.
243.     outhalf%density(idx) = in%density(idx) + (tau / 2.0) * inforce%density(idx)
244.     if (in%density(idx) > params%eps) then
245.       outhalf%U(idx,:)     = in%U(idx,:) + tau * inforce%U(idx,:) / 2.0
246.       outhalf%energy(idx)  = in%energy(idx) + tau * inforce%energy(idx) / 2.0
247.       outhalf%r(idx,:)     = in%r(idx,:) + tau * ( in%U(idx,:) / in%density(idx) + outhalf%U(idx,:)  / outhalf%density(idx) ) / 4.0
248.     else
249.       outhalf%U(idx,:)     = 0.0
250.       outhalf%energy(idx)  = 0.0
251.       outhalf%r(idx,:)     = in%r(idx,:)
252.     endif
253.
254.   end subroutine cuda_calculate_predictor_sph
255.
256.   attributes(global) subroutine cuda_calculate_forces_sph_corrector(in, inhalf, outnext)
257.     integer idx, idx_k, i, j, k, i_k, j_k, k_k
258.     !temporary variables for speed-up:
259.     real(kind=prk) H_i, H_k, sqrU_i
260.     !temporary 'fluid' variables:
261.     real(kind=prk) i_density, i_energy, theta_k, tmp
262.     real(kind=prk) :: Ui(dimen), ri(dimen), i_impulse(dimen)
263.     real(kind=prk) :: Uk(dimen), rk(dimen), Wr(dimen), tmpW(dimen)
264.     type(Fluids), device :: in, inhalf, outnext
265.     !formula 16:
266.
267.     i   = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
268.     j   = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
269.     k   = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
270.
271.     !calculate H_i:
272.     sqrU_i = 0.0
273.     idx = params%Ny * params%Nx * i + params%Nx * j + k + 1
274.     !calculate Fi:
275.     !get fluid_i characteristics (save in temp):
276.     if (inhalf%density(idx) > params%eps) then
277.       Ui(1:params%dimensions) = inhalf%U(idx,:) / inhalf%density(idx)
278.     else
279.       Ui = 0.0
280.     endif
281.     ri(1:params%dimensions) = inhalf%r(idx,:)
282.     i_density = inhalf%density(idx)
283.     i_energy  = inhalf%energy(idx)
284.     i_impulse = 0.0
285.
286.     !calculate H_i
287.     sqrU_i  = dot_product(Ui, Ui)
288.     tmp     = 2.0 * ( (params%gamma - 1.0) * (i_energy - (i_density * sqrU_i) / 2.0) )
289.     if (tmp > params%eps) then
290.       H_i     = sqrt(tmp)
291.     else
292.       H_i     = 0.0
293.     endif
294.
295.     !calculate sum(W_{ik}) and H_k and sum(Ui+Uk):
296.     H_k     = 0.0
297.     theta_k = 0.0
298.     i_energy= 0.0
299.
300.     do i_k = i - 1, i + 1
301.       if ( (i_k >= 0) .AND. (i_k < params%Nz) ) then
302.         do j_k = j - 1, j + 1
303.           if ( (j_k >= 0) .AND. (j_k < params%Ny) ) then
304.             do k_k = k - 1, k + 1
305.               if ( (k_k >= 0) .AND. (k_k < params%Nx) ) then
306.                 idx_k     = params%Ny * params%Nx * i_k + params%Nx * j_k + k_k + 1
307.                 if (inhalf%density(idx_k) > params%eps) then
308.                   Uk(1:params%dimensions) = inhalf%U(idx_k,:) / inhalf%density(idx_k)
309.                 else
310.                   Uk                      = 0.0
311.                 endif
312.                 sqrU_i    = dot_product(Uk, Uk)
313.                 tmp       = 2.0 * ( (params%gamma - 1.0) * (inhalf%energy(idx_k) - (inhalf%density(idx_k) * sqrU_i) / 2.0) )
314.                 if (tmp > params%eps) then
315.                   H_k     = sqrt(tmp)
316.                 else
317.                   H_k     = 0.0
318.                 endif
319.                 rk(1:params%dimensions) = inhalf%r(idx_k,:)
321.                 tmpW      = params%nablaW0(params%WNx * params%WNy * (i_k - i + 1) + params%WNx * (j_k - j + 1) + k_k - k + 2,:)
322.
323.                 if (H_i > params%eps) then
324.                   i_impulse(1:params%dimensions) = i_impulse(1:params%dimensions) - Wr(1:params%dimensions) * H_k - tmpW * i_density / H_i * ftheta_k(idx_k)
325.                   i_energy  = i_energy - H_k * dot_product(Ui + Uk, Wr) / 2.0 - ftheta_k(idx_k) * dot_product(inhalf%U(idx,:), tmpW) / H_i
326.                 else
327.                   i_impulse(1:params%dimensions) = i_impulse(1:params%dimensions) - Wr(1:params%dimensions) * H_k
328.                   i_energy  = i_energy - H_k * dot_product(Ui + Uk, Wr) / 2.0
329.                 endif
330.
331.               endif
332.             enddo
333.           endif
334.         enddo
335.       endif
336.     enddo
337.
338.     !calculate corrector forces:
339.     i_impulse(1:params%dimensions) = H_i * i_impulse(1:params%dimensions)
340.     i_energy                       = H_i * i_energy
341.
343.
344.     outnext%density(idx)   = in%density(idx) + 0.0 ! + 0.0 * (tau / 2.0)
345.     if (i_density > params%eps) then
346.       outnext%U(idx,:)     = in%U(idx,:) + tau * i_impulse(1:params%dimensions)
347.       outnext%energy(idx)  = in%energy(idx) + tau * i_energy
348.       outnext%r(idx,:)     = in%r(idx,:) + tau * ( in%U(idx,:) / i_density + outnext%U(idx,:) / i_density ) / 2.0
349.     else
350.       outnext%U(idx,:)     = 0.0
351.       outnext%energy(idx)  = 0.0
352.       outnext%r(idx,:)     = in%r(idx,:)
353.     endif
354.
355.
356. !calculate q(n+1/2):
357.
358. !     inhalf%density(idx) = ( in%density(idx) + outnext%density(idx) ) / 2.0
359. !     inhalf%energy(idx)  = ( in%energy(idx) + outnext%energy(idx) ) / 2.0
360. !     inhalf%r(idx,:)     = ( in%r(idx,:) + outnext%r(idx,:) ) / 2.0
361. !     inhalf%U(idx,:)     = ( in%U(idx,:) + outnext%U(idx,:) ) / 2.0
362.
363.   end subroutine cuda_calculate_forces_sph_corrector
364.
365.   attributes(global) subroutine cuda_calculate_fluidhalf(in, innext, outhalf)
366.     type(Fluids), device :: in, outhalf, innext
367.     integer :: i, j, k, idx
368. !calculate q(n+1/2):
369.
370.     i   = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
371.     j   = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
372.     k   = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
373.
374.     idx = params%Ny * params%Nx * i + params%Nx * j + k + 1
375.     outhalf%density(idx) = ( in%density(idx) + innext%density(idx) ) / 2.0
376.     outhalf%energy(idx)  = ( in%energy(idx) + innext%energy(idx) ) / 2.0
377.     outhalf%r(idx,:)     = ( in%r(idx,:) + innext%r(idx,:) ) / 2.0
378.     outhalf%U(idx,:)     = ( in%U(idx,:) + innext%U(idx,:) ) / 2.0
379.
380.   end subroutine cuda_calculate_fluidhalf
381.
382.   attributes(global) subroutine cuda_calculate_flux_tvd(in, inhalf, outq0, outfluxes, outflags)
383.     integer idx, idx_k, i, j, k, i_k, j_k, k_k, dims, dim2dim, dim3dim
384.     integer, dimension(:) :: offset(3), ijk(3)
385.     logical, dimension(:), device :: outflags
386.     type(Fluids), device :: in, inhalf, outq0 !in = n+1 from corrector !outq0 = n
387.     type(Fluids), device :: outfluxes(dimen)
388.     type(Fluidstemp) :: neighbrs(3 + (dimen - 1) * 2), fluidhalf(4), ql, qr, Fl, Fr
389.     real(kind=prk) :: dr1, dr2, dr3, L, pl, Cl
390.     real(kind=prk) :: Vrl(dimen), Vrr(dimen)
391.     real(kind=prk) :: aa
392. !    print *, "calculate_flux_tvd"
393. !    neighbours = 3 + (params%dimensions - 1) * 2
394. !    print *, "calculate_flux_tvd debug"
395.     neighbrs%density  = 0.0
396.     neighbrs%energy   = 0.0
397.     fluidhalf%density = 0.0
398.     fluidhalf%energy  = 0.0
399.
400.     do i = 1, params%dimensions
401.       neighbrs(:)%r(i)  = 0.0
402.       neighbrs(:)%U(i)  = 0.0
403.       fluidhalf(:)%r(i) = 0.0
404.       fluidhalf(:)%U(i) = 0.0
405.     enddo
406.     offset(1)         = 1
407.     offset(2)         = params%Nx
408.     offset(3)         = params%Nx * params%Ny
409.     dim2dim           = abs(params%dimensions == 1) * 1
410.     dim3dim           = abs(params%dimensions <  3) * 1
411.
412.     i   = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
413.     j   = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
414.     k   = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
415.
416.     idx = params%Ny * params%Nx * i + params%Nx * j + k + 1
417.
418.  do dims = 1, params%dimensions
419.               outfluxes(dims)%density(idx) = 0.0
420.               outfluxes(dims)%energy(idx)  = 0.0
421.               outfluxes(dims)%U(idx,:)     = 0.0
422.  enddo
423.
424.     !for each particle do:
425.     if ( (i >= 2 - params%dim3d) .AND. (i <= params%Nz-2 + dim3dim) ) then
426.       if ( (j >= 2 - params%dim2d) .AND. (j <= params%Ny-2 + dim2dim) ) then
427.         if ( (k >= 2) .AND. (k <= params%Nx-2) ) then
428.
429.           ijk(1) = k; ijk(2) = j; ijk(3) = i;
430.
431.           !get center cell (we will calculate fluxes relatively this cell):
432. !    print *, "idx, ", idx
433. !    print *, "ijk, ", ijk
434.
435.           !take all neighbours (1d: 3, 2d: 5(cross), 3d: 7(3d-cross)):
436.           if ( (k - 1 >= 0) .AND. (k - 1 < params%Nx) ) then
437.             neighbrs(1)%density = in%density(idx-1)
438.             neighbrs(1)%energy  = in%energy(idx-1)
439.             neighbrs(1)%r(:)    = in%r(idx-1,:)
440.             neighbrs(1)%U(:)    = in%U(idx-1,:)
441.           endif
442.           neighbrs(2)%density = in%density(idx)
443.           neighbrs(2)%energy  = in%energy(idx)
444.           neighbrs(2)%r(:)     = in%r(idx,:)
445.           neighbrs(2)%U(:)     = in%U(idx,:)
446.           if ( (k + 1 >= 0) .AND. (k + 1 < params%Nx) ) then
447.             neighbrs(3)%density = in%density(idx+1)
448.             neighbrs(3)%energy  = in%energy(idx+1)
449.             neighbrs(3)%r(:)     = in%r(idx+1,:)
450.             neighbrs(3)%U(:)     = in%U(idx+1,:)
451.           endif
452.
453.           !add neighbours if dim > 1:
454.           if ( params%dimensions > 1 ) then
455.             if ( (j - 1 >= 0) .AND. (j - 1 < params%Ny) ) then
456.               idx_k = idx - params%Nx !params%Ny * params%Nx * i_k + params%Nx * j_k + k_k + 1
457.               neighbrs(4)%density = in%density(idx_k)
458.               neighbrs(4)%energy  = in%energy(idx_k)
459.               neighbrs(4)%r(:)     = in%r(idx_k,:)
460.               neighbrs(4)%U(:)     = in%U(idx_k,:)
461.             endif
462.             if ( (j + 1 >= 0) .AND. (j + 1 < params%Ny) ) then
463.               idx_k = idx + params%Nx
464.               neighbrs(5)%density = in%density(idx_k)
465.               neighbrs(5)%energy  = in%energy(idx_k)
466.               neighbrs(5)%r(:)     = in%r(idx_k,:)
467.               neighbrs(5)%U(:)     = in%U(idx_k,:)
468.             endif
469.           endif
470.
471.           !add neighbours if dim > 2:
472.           if ( params%dimensions > 2 ) then
473.             if ( (i - 1 >= 0) .AND. (i - 1 < params%Nz) ) then
474.               idx_k = idx - params%Nx * params%Ny !params%Ny * params%Nx * i_k + params%Nx * j_k + k_k + 1
475.               neighbrs(6)%density = in%density(idx_k)
476.               neighbrs(6)%energy  = in%energy(idx_k)
477.               neighbrs(6)%r(:)     = in%r(idx_k,:)
478.               neighbrs(6)%U(:)     = in%U(idx_k,:)
479.             endif
480.             if ( (i + 1 >= 0) .AND. (i + 1 < params%Nz) ) then
481.               idx_k = idx + params%Nx * params%Ny
482.               neighbrs(7)%density = in%density(idx_k)
483.               neighbrs(7)%energy  = in%energy(idx_k)
484.               neighbrs(7)%r(:)     = in%r(idx_k,:)
485.               neighbrs(7)%U(:)     = in%U(idx_k,:)
486.             endif
487.           endif
488.           !Okay, now lets decide if we need to calculate fluxes (check density):
489. !take components based on current dimension:
490.           if (maxval(neighbrs%density) > params%eps) then
491.             outflags(idx) = .TRUE.
492.             do dims = 1, params%dimensions
493.
494.               do i_k = 0, 3
495.                 fluidhalf(i_k+1)%density = inhalf%density(idx + offset(dims) * (i_k - 2))
496.                 fluidhalf(i_k+1)%energy  = inhalf%energy(idx + offset(dims) * (i_k - 2))
497.                 fluidhalf(i_k+1)%U(:)    = inhalf%U(idx + offset(dims) * (i_k - 2),:)
498.                 fluidhalf(i_k+1)%r(:)    = inhalf%r(idx + offset(dims) * (i_k - 2),:)
499.               enddo
500.
501.   !calc ql:
502.
503.               dr1 = fluidhalf(2)%r(dims) - fluidhalf(1)%r(dims)
504.               dr2 = fluidhalf(3)%r(dims) - fluidhalf(2)%r(dims)
505.               dr3 = fluidhalf(4)%r(dims) - fluidhalf(3)%r(dims)
506.
507.               if (fluidhalf(2)%density > params%eps) then
508.                 drx = (params%smoothing_length / 2.0 - (fluidhalf(2)%r(dims) - (params%dx/2 + (ijk(dims) - 1) * params%dx)) / 2.0)
509.
510.                 call minmod((fluidhalf(3)%density - fluidhalf(2)%density) / dr2, (fluidhalf(2)%density - fluidhalf(1)%density) / dr1, L)
511.                 ql%density = fluidhalf(2)%density + drx * L
512.
513.                 call minmod((fluidhalf(3)%energy - fluidhalf(2)%energy) / dr2, (fluidhalf(2)%energy - fluidhalf(1)%energy) / dr1, L)
514.                 ql%energy = fluidhalf(2)%energy + drx * L
515.                 do j_k = 1, params%dimensions
516.                   call minmod((fluidhalf(3)%U(j_k) - fluidhalf(2)%U(j_k)) / dr2, (fluidhalf(2)%U(j_k) - fluidhalf(1)%U(j_k)) / dr1, L)
517.                   ql%U(j_k) = fluidhalf(2)%U(j_k) + drx * L
518.                 enddo
519.
520. !               Vrl = ql%U(1,:) !Vrl(1) = Vxl, Vrl(2) = Vyl, Vrl(3) = Vzl
521.                 if (ql%density > params%eps) then
522.                   Vrl = ql%U(:) / ql%density
523.                   pl  = abs((params%gamma - 1.0) * (ql%energy - ql%density*dot_product(Vrl,Vrl) / 2.0) > params%eps) * (params%gamma - 1.0) * (ql%energy - ql%density*dot_product(Vrl,Vrl) / 2.0)
524.                   Cl  = sqrt(params%gamma * pl / ql%density)
525.                 else
526.                   Vrl = 0.0
527.                   pl  = abs((params%gamma - 1.0) * (ql%energy - ql%density*dot_product(Vrl,Vrl) / 2.0) > params%eps) * (params%gamma - 1.0) * (ql%energy - ql%density*dot_product(Vrl,Vrl) / 2.0)
528.                   Cl  = 0.0
529.                 endif
530.                 Fl%density = ql%U(dims)
531.                 Fl%U(:)    = ql%U(:) * Vrl(dims)
532.                 Fl%energy  = ql%energy * Vrl(dims)
533.               else
534.                 ql%density = 0.0
535.                 ql%energy  = 0.0
536.                 ql%U(:)    = 0.0
537.                 Fl%density = 0.0
538.                 Fl%energy  = 0.0
539.                 Fl%U(:)    = 0.0
540.                 Cl         = 0.0
541.                 Vrl        = 0.0
542.               endif
543. !    print *, "t_debug4"
544.
545.   !calc qr:
546.               if (fluidhalf(3)%density > params%eps) then
547.
548.                 drx = (params%smoothing_length / 2.0 + (fluidhalf(3)%r(dims) - (params%dx / 2.0 + ijk(dims) * params%dx)) / 2.0)
549.
550.                 call minmod((fluidhalf(3)%density - fluidhalf(2)%density) / dr2, (fluidhalf(4)%density - fluidhalf(3)%density) / dr3, L)
551.                 qr%density = fluidhalf(3)%density - drx * L
552.                 call minmod((fluidhalf(3)%energy - fluidhalf(2)%energy) / dr2, (fluidhalf(4)%energy - fluidhalf(3)%energy) / dr3, L)
553.                 qr%energy = fluidhalf(3)%energy - drx * L
554.
555.                 do j_k = 1, params%dimensions
556.                   call minmod((fluidhalf(3)%U(j_k) - fluidhalf(2)%U(j_k)) / dr2, (fluidhalf(4)%U(j_k) - fluidhalf(3)%U(j_k)) / dr3, L)
557.                   qr%U(j_k) = fluidhalf(3)%U(j_k) - drx * L
558.                 enddo
559.
560. !               Vrr = qr%U(1,:) !Vrr(1) = Vxr, Vrr(2) = Vyr, Vrr(3) = Vzr
561.                 if (qr%density > params%eps) then
562.                   Vrr = qr%U(:) / qr%density
563.                   pr  = abs((params%gamma - 1.0) * (qr%energy - qr%density*dot_product(Vrr,Vrr) / 2.0) > params%eps) * (params%gamma - 1.0) * (qr%energy - qr%density*dot_product(Vrr,Vrr) / 2.0)
564.                   Cr  = sqrt(params%gamma * pr / qr%density)
565.                 else
566.                   Cr  = 0.0
567.                   Vrr = 0.0
568.                   pr  = abs((params%gamma - 1.0) * (qr%energy - qr%density*dot_product(Vrr,Vrr) / 2.0) > params%eps) * (params%gamma - 1.0) * (qr%energy - qr%density*dot_product(Vrr,Vrr) / 2.0)
569.                 endif
570.
571.                 Fr%density = qr%U(dims)
572.                 Fr%U(:)    = qr%U(:) * Vrr(dims)
573.                 Fr%energy  = qr%energy * Vrr(dims)
574.               else
575.                 qr%density = 0.0
576.                 qr%energy  = 0.0
577.                 qr%U(:)    = 0.0
578.                 Fr%density = 0.0
579.                 Fr%energy  = 0.0
580.                 Fr%U(:)    = 0.0
581.                 Cr         = 0.0
582.                 Vrr        = 0.0
583.               endif
584.   !calc Fx:
585.
586.               if ((Cl > params%eps) .OR. (Cr > params%eps)) then
587.                 Ll = min(Vrl(dims) - Cl, Vrr(dims) - Cr)
588.                 Lr = max(Vrr(dims) + Cr, Vrl(dims) + Cl)
589.                 bb = max(abs(Ll), abs(Lr))
590.                 outfluxes(dims)%density(idx) = ( Fr%density + Fl%density + bb * (ql%density - qr%density) ) / 2.0
591.                 outfluxes(dims)%energy(idx)  = ( Fr%energy  + Fl%energy  + bb * (ql%energy  - qr%energy)  ) / 2.0
592.                 outfluxes(dims)%U(idx,:)     = ( Fr%U(:)    + Fl%U(:)    + bb * (ql%U(:)    - qr%U(:))    ) / 2.0
593.               else
594.                 outfluxes(dims)%density(idx) = 0.0
595.                 outfluxes(dims)%energy(idx)  = 0.0
596.                 outfluxes(dims)%U(idx,:)     = 0.0
597.               endif
598.
599.             enddo
600.           else
601.             outflags(idx) = .FALSE.
602.             do dims = 1, params%dimensions
603.               outfluxes(dims)%density(idx) = 0.0
604.               outfluxes(dims)%energy(idx)  = 0.0
605.               outfluxes(dims)%U(idx,:)     = 0.0
606.             enddo
607.           endif
608. !           outfluxes(:)%density(idx) = 0.0
609.
610.         endif
611.       endif
612.     endif
613.
614.   end subroutine cuda_calculate_flux_tvd
615.
616.   attributes(global) subroutine cuda_finalize(outq0, in, influxes, inflags)
617.     type(Fluids), device :: in, outq0 !in = n+1 from corrector !outq0 = n
618.     type(Fluids), device :: influxes(dimen)
619.     type(Fluidstemp) :: fluidhalf(dimen)
620.     integer, dimension(:) :: offset(3)
621.     logical, dimension(:), device :: inflags
622.     integer :: dims, i, j, k, idx
623.
624.     offset(1)         = 1
625.     offset(2)         = params%Nx
626.     offset(3)         = params%Nx * params%Ny
627.
628.     i   = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
629.     j   = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
630.     k   = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
631.
632.     idx = params%Ny * params%Nx * i + params%Nx * j + k + 1
633.
634.     if ( (i >= 2 - params%dim3d) .AND. (i <= params%Nz-3 + params%dim3d) ) then
635.       if ( (j >= 2 - params%dim2d) .AND. (j <= params%Ny-3 + params%dim2d) ) then
636.         if ( (k >= 2) .AND. (k <= params%Nx-3) ) then
637.
638.           if (inflags(idx)) then
639. !calculate all neighbour fluxes (idx + offset(dims)):
640.             do dims = 1, params%dimensions
641.               fluidhalf(dims)%density = influxes(dims)%density(idx + offset(dims))
642.               fluidhalf(dims)%energy  = influxes(dims)%energy(idx + offset(dims))
643.               fluidhalf(dims)%U(:)    = influxes(dims)%U(idx + offset(dims),:)
644.             enddo
645. !calculate final values:
646.             outq0%density(idx) = in%density(idx) + tau / params%smoothing_length * (sum(influxes(:)%density(idx)) - sum(fluidhalf(1:params%dimensions)%density))
647.             outq0%energy(idx)  = in%energy(idx)  + tau / params%smoothing_length * (sum(influxes(:)%energy(idx))  - sum(fluidhalf(1:params%dimensions)%energy))
648.             do dims = 1, params%dimensions
649.               outq0%U(idx,dims)  = in%U(idx,dims) + tau / params%smoothing_length * (sum(influxes(:)%U(idx,dims)) - sum(fluidhalf(1:params%dimensions)%U(dims)))
650.             enddo
651.
652.           else
653.             outq0%density(idx) = in%density(idx)
654.             outq0%energy(idx)  = in%energy(idx)
655.             outq0%U(idx,:)     = in%U(idx,:)
656.           endif
657.
658.         endif
659.       endif
660.     endif
661.   end subroutine cuda_finalize
662.
663.   attributes(device) subroutine minmod(a, b, out)
664.     real(kind=prk), device ::  a, b, out
665.     out = ( 1.0 / 2.0) * (sign(1.0, a) + sign(1.0, b)) * min(abs(a), abs(b))
666.   end subroutine minmod
667.
668.   subroutine cuda_setupSystem(inparams)
669.
670.   end subroutine
671.
672.   attributes(global) subroutine cuda_boundary_conditions(ptr)
673.     integer j, k, idx, idx_j, idx_k
674.     type(Fluids), device :: ptr
675.     integer ti, tj, tk, tidx
676.     ti   = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
677.     tj   = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
678.     tk   = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
679.
680. !    idx = params%Ny * params%Nx * i + params%Nx * j + k + 1
681.
682.     if (params%dimensions == 1) then
683.       ptr%density(1) = ptr%density(3)
684.       ptr%density(2) = ptr%density(1)
685.       ptr%density(params%Nx) = ptr%density(params%Nx - 2)
686.       ptr%density(params%Nx-1) = ptr%density(params%Nx)
687.       ptr%energy(1) = ptr%energy(3)
688.       ptr%energy(2) = ptr%energy(1)
689.       ptr%energy(params%Nx) = ptr%energy(params%Nx - 2)
690.       ptr%energy(params%Nx-1) = ptr%energy(params%Nx)
691.       ptr%U(1,1) = ptr%U(3,1)
692.       ptr%U(2,1) = ptr%U(1,1)
693.       ptr%U(params%Nx,1) = ptr%U(params%Nx - 2,1)
694.       ptr%U(params%Nx-1,1) = ptr%U(params%Nx,1)
695.     endif
696.
697.     if (params%dimensions == 2) then
698. ! copy x:
699.       if ( (k >= 0) .AND. (k <= params%Nx-1) ) then
700. !      do k = 0, params%Nx-1
701.         idx   = k + 1
702.         idx_j = params%Nx + idx
703.         idx_k = 2 * params%Nx + idx
704.         ptr%density(idx) = ptr%density(idx_k)
705.         ptr%density(idx_j) = ptr%density(idx)
706.         ptr%density((params%Ny - 1) * params%Nx + idx) = ptr%density((params%Ny - 3) * params%Nx + idx)
707.         ptr%density((params%Ny - 2) * params%Nx + idx) = ptr%density((params%Ny - 1) * params%Nx + idx)
708.         ptr%energy(idx) = ptr%energy(idx_k)
709.         ptr%energy(idx_j) = ptr%energy(idx)
710.         ptr%energy((params%Ny - 1) * params%Nx + idx) = ptr%energy((params%Ny - 3) * params%Nx + idx)
711.         ptr%energy((params%Ny - 2) * params%Nx + idx) = ptr%energy((params%Ny - 1) * params%Nx + idx)
712.         ptr%U(idx,1) = ptr%U(idx_k,1)
713.         ptr%U(idx_j,1) = ptr%U(idx,1)
714.         ptr%U(idx,2) = ptr%U(idx_k,2)
715.         ptr%U(idx_j,2) = ptr%U(idx,2)
716.         ptr%U((params%Ny - 1) * params%Nx + idx,1) = ptr%U((params%Ny - 3) * params%Nx + idx,1)
717.         ptr%U((params%Ny - 2) * params%Nx + idx,1) = ptr%U((params%Ny - 1) * params%Nx + idx,1)
718.         ptr%U((params%Ny - 1) * params%Nx + idx,2) = ptr%U((params%Ny - 3) * params%Nx + idx,2)
719.         ptr%U((params%Ny - 2) * params%Nx + idx,2) = ptr%U((params%Ny - 1) * params%Nx + idx,2)
720.       endif
721. ! copy y:
722.       if ( (j >= 0) .AND. (j <= params%Ny-1) ) then
723. !      do k = 0, params%Ny-1
724.         idx   = params%Nx * j + 1
725.         idx_j = idx + 1
726.         idx_k = idx + 2
727.         ptr%density(idx) = ptr%density(idx_k)
728.         ptr%density(idx_j) = ptr%density(idx)
729.         ptr%density(params%Nx + idx - 1) = ptr%density(params%Nx + idx - 3)
730.         ptr%density(params%Nx + idx - 2) = ptr%density(params%Nx + idx - 1)
731.         ptr%energy(idx) = ptr%energy(idx_k)
732.         ptr%energy(idx_j) = ptr%energy(idx)
733.         ptr%energy(params%Nx + idx - 1) = ptr%energy(params%Nx + idx - 3)
734.         ptr%energy(params%Nx + idx - 2) = ptr%energy(params%Nx + idx - 1)
735.         ptr%U(idx,1) = ptr%U(idx_k,1)
736.         ptr%U(idx_j,1) = ptr%U(idx,1)
737.         ptr%U(idx,2) = ptr%U(idx_k,2)
738.         ptr%U(idx_j,2) = ptr%U(idx,2)
739.         ptr%U(params%Nx + idx - 1,1) = ptr%U(params%Nx + idx - 3,1)
740.         ptr%U(params%Nx + idx - 2,1) = ptr%U(params%Nx + idx - 1,1)
741.         ptr%U(params%Nx + idx - 1,2) = ptr%U(params%Nx + idx - 3,2)
742.         ptr%U(params%Nx + idx - 2,2) = ptr%U(params%Nx + idx - 1,2)
743.       endif
744.     endif
745.
746.     if (params%dimensions == 3) then
747. !copy (for [y x z])
748.
749.       if ( (j >= 0) .AND. (j <= params%Nz-1) ) then
750.         if ( (k >= 0) .AND. (k <= params%Ny-1) ) then
751.           idx   = params%Nx * params%Ny * j + params%Nx * k + 1
752.           idx_j = idx + 1
753.           idx_k = idx + 2
754.           ptr%density(idx) = ptr%density(idx_k)
755.           ptr%density(idx_j) = ptr%density(idx)
756.           ptr%density(params%Nx + idx - 1) = ptr%density(params%Nx + idx - 3)
757.           ptr%density(params%Nx + idx - 2) = ptr%density(params%Nx + idx - 1)
758.           ptr%energy(idx) = ptr%energy(idx_k)
759.           ptr%energy(idx_j) = ptr%energy(idx)
760.           ptr%energy(params%Nx + idx - 1) = ptr%energy(params%Nx + idx - 3)
761.           ptr%energy(params%Nx + idx - 2) = ptr%energy(params%Nx + idx - 1)
762.           ptr%U(idx,1) = ptr%U(idx_k,1)
763.           ptr%U(idx_j,1) = ptr%U(idx,1)
764.           ptr%U(idx,2) = ptr%U(idx_k,2)
765.           ptr%U(idx_j,2) = ptr%U(idx,2)
766.           ptr%U(idx,3) = ptr%U(idx_k,3)
767.           ptr%U(idx_j,3) = ptr%U(idx,3)
768.           ptr%U(params%Nx + idx - 1,1) = ptr%U(params%Nx + idx - 3,1)
769.           ptr%U(params%Nx + idx - 2,1) = ptr%U(params%Nx + idx - 1,1)
770.           ptr%U(params%Nx + idx - 1,2) = ptr%U(params%Nx + idx - 3,2)
771.           ptr%U(params%Nx + idx - 2,2) = ptr%U(params%Nx + idx - 1,2)
772.           ptr%U(params%Nx + idx - 1,3) = ptr%U(params%Nx + idx - 3,3)
773.           ptr%U(params%Nx + idx - 2,3) = ptr%U(params%Nx + idx - 1,3)
774.         endif
775.       endif
776.
777. !copy (for [x x z])
778.       if ( (j >= 0) .AND. (j <= params%Nz-1) ) then
779.         if ( (k >= 0) .AND. (k <= params%Nx-1) ) then
780.           idx   = params%Nx * params%Ny * j + k + 1
781.           idx_j = idx + params%Nx
782.           idx_k = idx + 2 * params%Nx
783.           ptr%density(idx) = ptr%density(idx_k)
784.           ptr%density(idx_j) = ptr%density(idx)
785.           ptr%density((params%Ny - 1) * params%Nx + idx) = ptr%density((params%Ny - 3) * params%Nx + idx)
786.           ptr%density((params%Ny - 2) * params%Nx + idx) = ptr%density((params%Ny - 1) * params%Nx + idx)
787.           ptr%energy(idx) = ptr%energy(idx_k)
788.           ptr%energy(idx_j) = ptr%energy(idx)
789.           ptr%energy((params%Ny - 1) * params%Nx + idx) = ptr%energy((params%Ny - 3) * params%Nx + idx)
790.           ptr%energy((params%Ny - 2) * params%Nx + idx) = ptr%energy((params%Ny - 1) * params%Nx + idx)
791.           ptr%U(idx,1) = ptr%U(idx_k,1)
792.           ptr%U(idx_j,1) = ptr%U(idx,1)
793.           ptr%U(idx,2) = ptr%U(idx_k,2)
794.           ptr%U(idx_j,2) = ptr%U(idx,2)
795.           ptr%U(idx,3) = ptr%U(idx_k,3)
796.           ptr%U(idx_j,3) = ptr%U(idx,3)
797.           ptr%U((params%Ny - 1) * params%Nx + idx,1) = ptr%U((params%Ny - 3) * params%Nx + idx,1)
798.           ptr%U((params%Ny - 2) * params%Nx + idx,1) = ptr%U((params%Ny - 1) * params%Nx + idx,1)
799.           ptr%U((params%Ny - 1) * params%Nx + idx,2) = ptr%U((params%Ny - 3) * params%Nx + idx,2)
800.           ptr%U((params%Ny - 2) * params%Nx + idx,2) = ptr%U((params%Ny - 1) * params%Nx + idx,2)
801.           ptr%U((params%Ny - 1) * params%Nx + idx,3) = ptr%U((params%Ny - 3) * params%Nx + idx,3)
802.           ptr%U((params%Ny - 2) * params%Nx + idx,3) = ptr%U((params%Ny - 1) * params%Nx + idx,3)
803.         endif
804.       endif
805.
806. !copy (for [x x y])
807.       if ( (j >= 0) .AND. (j <= params%Ny-1) ) then
808.         if ( (k >= 0) .AND. (k <= params%Nx-1) ) then
809.           idx   = params%Nx * j + k + 1
810.           idx_j = idx + params%Nx * params%Ny
811.           idx_k = idx + params%Nx * params%Ny * 2
812.           ptr%density(idx) = ptr%density(idx_k)
813.           ptr%density(idx_j) = ptr%density(idx)
814.           ptr%density(params%Nx * params%Ny * (params%Nz - 1) + idx) = ptr%density(params%Nx * params%Ny * (params%Nz - 3) + idx)
815.           ptr%density(params%Nx * params%Ny * (params%Nz - 2) + idx) = ptr%density(params%Nx * params%Ny * (params%Nz - 1) + idx)
816.           ptr%energy(idx) = ptr%energy(idx_k)
817.           ptr%energy(idx_j) = ptr%energy(idx)
818.           ptr%energy(params%Nx * params%Ny * (params%Nz - 1) + idx) = ptr%energy(params%Nx * params%Ny * (params%Nz - 3) + idx)
819.           ptr%energy(params%Nx * params%Ny * (params%Nz - 2) + idx) = ptr%energy(params%Nx * params%Ny * (params%Nz - 1) + idx)
820.           ptr%U(idx,1) = ptr%U(idx_k,1)
821.           ptr%U(idx_j,1) = ptr%U(idx,1)
822.           ptr%U(idx,2) = ptr%U(idx_k,2)
823.           ptr%U(idx_j,2) = ptr%U(idx,2)
824.           ptr%U(idx,3) = ptr%U(idx_k,3)
825.           ptr%U(idx_j,3) = ptr%U(idx,3)
826.           ptr%U(params%Nx * params%Ny * (params%Nz - 1) + idx,1) = ptr%U(params%Nx * params%Ny * (params%Nz - 3) + idx,1)
827.           ptr%U(params%Nx * params%Ny * (params%Nz - 2) + idx,1) = ptr%U(params%Nx * params%Ny * (params%Nz - 1) + idx,1)
828.           ptr%U(params%Nx * params%Ny * (params%Nz - 1) + idx,2) = ptr%U(params%Nx * params%Ny * (params%Nz - 3) + idx,2)
829.           ptr%U(params%Nx * params%Ny * (params%Nz - 2) + idx,2) = ptr%U(params%Nx * params%Ny * (params%Nz - 1) + idx,2)
830.           ptr%U(params%Nx * params%Ny * (params%Nz - 1) + idx,3) = ptr%U(params%Nx * params%Ny * (params%Nz - 3) + idx,3)
831.           ptr%U(params%Nx * params%Ny * (params%Nz - 2) + idx,3) = ptr%U(params%Nx * params%Ny * (params%Nz - 1) + idx,3)
832.         endif
833.       endif
834.
835.     endif
836.
837.   end subroutine
838.
839.   attributes(global) subroutine initial_conditions(allf)
840.     type(Fluids), device :: allf
841.     integer :: i, j, k, dims, ijk(3), N
842.     i   = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
843.     j   = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
844.     k   = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
845.     N   = params%Nx
846.     idx = params%Ny * params%Nx * i + params%Nx * j + k + 1
847.     ijk(1) = k
848.     ijk(2) = j
849.     ijk(3) = i
850. !    print *, "idx", idx
851. !    print *, "ijk", i, j, k
852.
853.    if (idx >  (params%totalcells * 0.51) .AND. idx < (params%totalcells * 0.61)) then ! wave
854.       allfluid%density(idx) = 11.3
855.       allfluid%U(idx,:)     = -22.2
856.       allfluid%energy(idx)  = 45.0
857. !   if (idx <  (params%totalcells * 0.4)) then
858.    else if (idx >  (params%totalcells * 0.39) .AND. idx < (params%totalcells * 0.49)) then ! wave
859.       allfluid%density(idx) = 11.3
860.       allfluid%U(idx,:)     = 22.2
861.       allfluid%energy(idx)  = 45.0
862.    else
863.      allfluid%density(idx) = 11.3
864.      allfluid%U(idx,:)     = 0.0
865.      allfluid%energy(idx)  = 0.0
866.    endif
867.
868. !    else
869. !     allfluid%density(idx) = 20.0
870. !     allfluid%U(idx,:)     = 0.0
871. !     allfluid%energy(idx)  = 0.0
872. !    endif
873.
874.     do dims = 1, params%dimensions
875.       allf%r(idx,dims) = params%dx / 2.0 + ijk(dims) * params%dx
876.     enddo
877.   end subroutine
878.
880.     type(Parameters) :: inp
881.     real(kind=prk) dW, s
882.     real(kind=prk) dr_ik(inp%dimensions)
883.     integer i, j, k, i_max, j_max, k_max, dims, ijk(inp%dimensions)
884.     if ( inp%dimensions == 3 ) then
885.       i_max = 3; j_max = 3; k_max = 3;
886.     else if ( inp%dimensions == 2 ) then
887.       i_max = 1; j_max = 3; k_max = 3;
888.     else
889.       i_max = 1; j_max = 1; k_max = 3;
890.     endif
891.
892.    do i = 0, i_max-1
893.       do j = 0, j_max-1
894.         do k = 0, k_max-1
895.
896.           if (inp%dimensions == 1) then
897.             ijk(1) = k;
898.           else if (inp%dimensions == 2) then
899.             ijk(1) = j; ijk(2) = k;
900.           else
901.             ijk(1) = i; ijk(2) = j; ijk(3) = k;
902.           endif
903.
904.           do dims = 1, inp%dimensions
905.             dr_ik(dims) = inp%smoothing_length*(1-ijk(dims))
906.           enddo
907.
908.           s  = dot_product(dr_ik, dr_ik)
909.           s  = sqrt(s);
911.           inp%nablaW0(inp%WNx * inp%WNx * i + inp%WNx * j + k + 1, :) = dW * dr_ik
912.
913.         enddo
914.       enddo
915.     enddo
916.
917.   end subroutine
918.
920.     type(Parameters), intent(in) :: inp
921.     real(kind=prk) :: s, ss, dW, gradW
922.     ss = s / inp%smoothing_length
923.     if ((ss > 0) .AND. (ss < 1.0)) then
924.       dW = (-3.0 + 2.25 * ss ) * ss
925.     else if((ss < 2.0) .AND. (ss >= 1.0)) then
926.       dW = (2.0 - ss); dW = -0.75 * dW * dW
927.     else
928.       dW=0.0;
929.     endif
930.     if(s > inp%eps) then
931.       gradW = inp%cnst * dW / s / inp%smoothing_length
932.     else
934.     endif
936.
937.   attributes(device) subroutine calculateSmoothingGradientW(r_i, r_k, W)
938.     real(kind=prk) q, s
939.     real(kind=prk), device :: r_i(dimen), r_k(dimen), W(dimen)
940.     s = sqrt(SUM((r_i - r_k) ** 2))
941.     q = s / params%smoothing_length
942.     if (s > params%eps) then
943.       !if s (0;1]
944.       if ( (q > 0) .AND. (q <= 1.0) ) then
945.         W = (r_i - r_k) * params%cnst * ( -3.0 + 2.25 * q ) * q / s / params%smoothing_length
946.       !      W = params%cnst * (r_k - r_i) * (4.0 * params%smoothing_length - 3.0 * s)
947.       !if s (1;2] :
948.       else if ( (q > 1.0) .AND. (q <= 2.0) ) then
949.         W = (r_i - r_k) * params%cnst * ( -0.75 * (2.0 - q) ** 2 ) / s / params%smoothing_length
950.       !      W = params%cnst * (r_k - r_i) * ( (s - 2.0 * params%smoothing_length) ** 2 ) / s
951.       else
952.         W = 0.0
953.       endif
954.     else
955.         W = 0.0
956.     endif
957.   end subroutine
958.
959.
960.   subroutine cuda_run(t_save, t_end, t_step)
961.     real(kind=prk) :: t_save, t_end, t_step
962.     real(kind=prk) :: total_time, current_time, end_time, save_time
963.     real(kind=prk) :: iter, h_tau
965.     character(len=70) :: fn, tmp
966.     type(Fluids) :: h_all
967.     type(Fluidstemp), device :: minz, maxz
968.     type(Fluidstemp) :: h_minz, h_maxz
969.     type(Parameters) :: outp
970.  type ( cudaEvent ) :: startEvent , stopEvent
971.  real :: time, random, tmpa
972.  integer :: istat
973.     integer, parameter :: outunit=44
974.     integer :: filenum, i
975.     logical :: flag
976.     outp = params
977.     if (outp%dimensions == 1) then
978.     blocks = dim3(outp%Nx / BLOCK_SIZE, 1, 1)
979.     threads = dim3(BLOCK_SIZE, 1, 1)
980.     else if (outp%dimensions == 2) then
981.     blocks = dim3(outp%Nx / BLOCK_SIZE, outp%Ny / BLOCK_SIZE, 1)
982.     threads = dim3(BLOCK_SIZE, BLOCK_SIZE, 1)
983.     else
984.     blocks = dim3(outp%Nx / BLOCK_SIZE, outp%Ny / BLOCK_SIZE, outp%Nz / BLOCK_SIZE)
985.     threads = dim3(BLOCK_SIZE, BLOCK_SIZE, BLOCK_SIZE)
986.     endif
987.
988. !print *, "run!"
989.
990.     flag         = .FALSE.
991.     filenum      = 1
992.     current_time = 0.0
993.     save_time    = 0.0
994.     iter         = 1.0
995.     end_time     = t_end
996.
997.     time_steps = 1000000.0
999.
1000. !FTheta plot:
1001.     h_all = allfluid
1002.     ! build filename -- i.grd
1003.     write(fn,fmt='(a)') 'ftheta.txt'
1004.
1005.     ! open it with a fixed unit number
1006.     open(unit=outunit,file=fn, form='formatted')
1007.
1008.     ! write something
1009.     do i = 1, outp%totalcells
1010.       write(outunit, '(F0.7,x,F0.7)') h_all%r(i,1), host_ftheta_k(i, outp)
1011.     enddo
1012.     ! close it
1013.     close(outunit)
1014.
1015. !Calculate:
1016.
1017.     do while (current_time < end_time)
1018.
1019.     write(tmp,fmt='(a,i0,a)') '(',outp%Nx, 'F)'
1020.
1021. !      do while (iter < 6)
1022.
1024.
1026.
1027.       call thrustsort(time_steps,size(time_steps))
1028.
1029.       istat = cudaMemcpy(tau, time_steps(1), 1, cudaMemcpyDeviceToDevice)
1030.       h_tau = tau
1031.
1033.
1034.       call cuda_calculate_predictor_sph<<<blocks, threads>>>(allfluid, allforces, allfluidhalf, tau)
1035.
1037.
1038.       call cuda_calculate_forces_sph_corrector<<<blocks, threads>>>(allfluid, allfluidhalf, allfluidnext)
1039.
1041.
1042.       call cuda_calculate_fluidhalf<<<blocks, threads>>>(allfluid, allfluidnext, allfluidhalf)
1043.
1044.       call cuda_calculate_flux_tvd<<<blocks, threads>>>(allfluidnext, allfluidhalf, allfluid, allfluxes, boolflags)
1045.
1046. !       h_all  = allfluxes(1)
1047. ! print *, "Debug: density sum:", sum(abs(h_all%density))
1048. ! print *, "Debug: energy  sum:", sum(abs(h_all%energy))
1049. ! print *, "Debug: impulse sum:", sum(abs(h_all%U))
1050.
1051.       call cuda_finalize<<<blocks, threads>>>(allfluid, allfluidnext, allfluxes, boolflags)
1052.
1053.       if (current_time >= save_time) then
1054.
1056.
1058.
1059.         h_tau  = tau
1060.         h_all  = allfluidnext
1061. print *, "density sum:", sum(abs(h_all%density))
1062. print *, "energy  sum:", sum(abs(h_all%energy))
1063. print *, "impulse sum:", sum(abs(h_all%U))
1064.         print *, "current time:", current_time
1065.         print *, "save time:", save_time
1066.         print *, "dt:", h_tau
1067.         print *, "iteration:", iter
1068. random = 0.0
1069. do i = 1, outp%totalcells
1070.   if (h_all%density(i) > outp%eps) then
1071.       tmpa = outp%gamma * h_all%energy(i) / h_all%density(i)
1072.       if (tmpa > outp%eps) then
1073.         tmpa = sqrt(tmpa)
1074.         tmpa = sqrt(dot_product(h_all%U(i,:),h_all%U(i,:))) / tmpa
1075.       endif
1076.     if (tmpa > random) then
1077.       random = tmpa
1078.     endif
1079.   endif
1080. enddo
1081. print *, 'Mach: ', random
1082.
1083. !       call minmax<<<1,1>>>(allfluidnext, minz, maxz)
1084.         istat = cudaMemcpy(time_steps, allfluidnext%density, grid**dimen, cudaMemcpyDeviceToDevice)
1085.         call thrustsort(time_steps,size(time_steps))
1086.         h_minz%density = time_steps(1)
1087.         h_maxz%density = time_steps(grid**dimen)
1088.
1089.         istat = cudaMemcpy(time_steps, allfluidnext%energy, grid**dimen, cudaMemcpyDeviceToDevice)
1090.         call thrustsort(time_steps,size(time_steps))
1091.         h_minz%energy  = time_steps(1)
1092.         h_maxz%energy  = time_steps(grid**dimen)
1093.
1094.         do i = 1, dimen
1095.           istat = cudaMemcpy(time_steps, allfluidnext%U(:,i), grid**dimen, cudaMemcpyDeviceToDevice)
1096.           call thrustsort(time_steps,size(time_steps))
1097.           h_minz%U(i)    = time_steps(1)
1098.           h_maxz%U(i)    = time_steps(grid**dimen)
1099.         enddo
1100.
1101.         h_all  = allfluidnext
1102.
1103. !save data to hard drive:
1104.         ! build filename -- i.grd
1105.         write(fn,fmt='(a,i4,a)') '1d_cuda_rho_', filenum, '.txt'
1106.
1107.         ! open it with a fixed unit number
1108.         open(unit=outunit,file=fn, form='formatted')
1109.
1110.         ! write something
1111.         do i = 1, outp%totalcells
1112.           write(outunit, '(F0.7,x,F0.7)') h_all%r(i,1), h_all%density(i)
1113.         enddo
1114.         ! close it
1115.         close(outunit)
1116.
1117.         ! build filename -- i.grd
1118.         write(fn,fmt='(a,i4,a)') '1d_cuda_p_', filenum, '.txt'
1119.
1120.         ! open it with a fixed unit number
1121.         open(unit=outunit,file=fn, form='formatted')
1122.
1123.         ! write something
1124.         do i = 1, outp%totalcells
1125.           write(outunit, '(F0.7,x,F0.7)') h_all%r(i,1), h_all%energy(i)
1126.         enddo
1127.
1128.         ! close it
1129.         close(outunit)
1130.
1131.         ! build filename -- i.grd
1132.         write(fn,fmt='(a,i4,a)') '1d_cuda_Vx_', filenum, '.txt'
1133.
1134.         ! open it with a fixed unit number
1135.         open(unit=outunit,file=fn, form='formatted')
1136.
1137.         ! write something
1138.         do i = 1, outp%totalcells
1139.           write(outunit, '(F0.7,x,F0.7)') h_all%r(i,1), h_all%U(i,1)
1140.         enddo
1141.         ! close it
1142.         close(outunit)
1143.
1144.
1145.         filenum   = filenum + 1
1146.         save_time = save_time + t_save
1147.
1148.       endif
1149.
1150. !print *, 'current_time, h_tau', current_time, h_tau
1151. !print *, 'iter', iter
1152.
1153.       current_time = current_time + h_tau
1154.
1155.       iter = iter + 1.0
1156.
1157.     enddo
1158.
1159.
1160.
1161.   end subroutine
1162.
1163.   function host_ftheta_k(indx, inp)
1164.     real(kind=prk) :: host_ftheta_k, coord
1165.     type(Parameters) :: inp
1166.     integer :: indx
1167.
1168.     coord = inp%smoothing_length / 2.0 + real(indx, kind=prk) * inp%smoothing_length
1169.
1170.     if (indx < inp%totalcells/3 .AND. indx > inp%totalcells/6) then
1171. !      ftheta_k = -4.0 / ( Exp(2.0 * coord) + 2.0 + Exp(-2.0 * coord) )
1172.       host_ftheta_k = inp%ta * coord ** 2 + inp%tb * coord + inp%tc
1173.     else
1174.       host_ftheta_k = 0.0
1175.     endif
1176. !      ftheta_k = 0.0
1177.   end function host_ftheta_k
1178.
1179.   attributes(global) subroutine minmax(in, outmin, outmax)
1180.     type(Fluids), device :: in
1181.     type(Fluidstemp), device :: outmax, outmin
1182.     integer i
1183.     !find min and max values
1184.     outmin%density = minval(in%density(:))
1185.     outmin%energy  = minval(in%energy(:))
1186.     outmax%density = maxval(in%density(:))
1187.     outmax%energy  = maxval(in%energy(:))
1188.     do i = 1, params%dimensions
1189.       outmin%U(i)   = minval(in%U(:,i))
1190.       outmax%U(i)   = maxval(in%U(:,i))
1191.     enddo
1192.   end subroutine
1193.
1194.   attributes(global) subroutine integral_values(out, in)
1195.     type(Fluids), device :: out, in
1196.     real(kind=prk) :: tmp(dimen)
1197.     integer :: i, j, k, idx
1198.     tmp = 0.0
1199.
1200.     i   = threadIdx%z + (blockIdx%z - 1) * blockDim%z - 1
1201.     j   = threadIdx%y + (blockIdx%y - 1) * blockDim%y - 1
1202.     k   = threadIdx%x + (blockIdx%x - 1) * blockDim%x - 1
1203.
1204.     idx = params%Ny * params%Nx * i + params%Nx * j + k + 1
1205.
1206. !density:
1207.     out%density(idx) = in%density(idx)
1208. !pressure:
1209.     if (in%density(idx) > params%eps) then
1210. !velocity:
1211.       tmp(1:params%dimensions) = in%U(idx,:) / in%density(idx)
1212.       out%U(idx,:) = tmp(1:params%dimensions)
1213.     else
1214.       tmp = 0.0
1215.       out%U(idx,:) = 0.0
1216.     endif
1217.     out%energy(idx) = (params%gamma - 1.0) * (in%energy(idx) - 0.5 * in%density(idx) * dot_product(tmp, tmp))
1218.     if (out%energy(idx) < params%eps) then
1219.       out%energy(idx) = 0.0
1220.     endif
1221.   end subroutine
1222.
1223.   subroutine fthetacoef(inp)
1224.     real(kind=prk) :: minus
1225.     real(kind=prk) :: x1, x2, x3, y1, y2, y3
1226.     type(Parameters) :: inp
1227.     minus = 0.0
1228.
1229. !calc x1-x3,y1-y3:
1230.
1231.     x1 = inp%smoothing_length / 2.0 + (inp%totalcells / 6.0) * inp%smoothing_length
1232.     x3 = inp%smoothing_length / 2.0 + (inp%totalcells / 3.0) * inp%smoothing_length
1233.     x2 = (x3 + x1) / 2.0
1234.     y1 = 0.0
1235.     y2 = minus
1236.     y3 = 0.0
1237.
1238. print *, x1, x2, x3
1239. print *, y1, y2, y3
1240. !calc coef:
1241.
1242.     inp%ta = (y3 - (x3 * (y2 - y1) + x2 * y1 - x1 * y2) / (x2 - x1) ) / (x3 * (x3 - x1 - x2) + x1 * x2)
1243.     inp%tb = (y2 - y1) / (x2 - x1) - inp%ta * (x1 + x2)
1244.     inp%tc = (x2 * y1 - x1 * y2) / (x2 - x1) + inp%ta * x1 * x2
1245.
1246.   end subroutine
1247.
1248.
1249. end module CUDASphTvd
1250.
1251. program GPU_cSPH_TVD
1252.   use CUDASphTvd
1253.   use cudafor
1254.   use thrust
1255.   implicit none
1256.   type(Parameters) :: inparams
1257.   type(test), allocatable, device :: z(:)
1258.   real, allocatable, device :: zz(:)
1259.   type(test) :: h_z
1260.   type(test), device :: d_z
1261.   integer :: istat
1262.
1263. ! ! cuda events for elapsing
1264. ! type ( cudaEvent ) :: startEvent , stopEvent
1265. ! real :: time, random
1266. ! integer :: istat
1267. !
1268. ! ! Create events
1269. ! istat = cudaEventCreate ( startEvent )
1270. ! istat = cudaEventCreate ( stopEvent )
1271. !
1272. !
1273. ! istat = cudaEventRecord ( startEvent , 0)
1274. ! call thrustsort(gpuData,size(gpuData))
1275. !
1276. ! istat = cudaEventRecord ( stopEvent , 0)
1277. ! istat = cudaEventSynchronize ( stopEvent )
1278. ! istat = cudaEventElapsedTime ( time , startEvent , stopEvent )
1279. !
1280. ! print *," Sorted array in:",time," (ms)"
1281.
1282.   inparams%smoothing_length = 0.5
1283.   inparams%dx               = 0.5
1284.   inparams%courant          = 0.0001
1285.   inparams%eps              = 0.0000001
1286. !  host%gamma            = 1.5
1287.   inparams%gamma            = 1.5
1288.   inparams%rho_0            = 1
1289.   inparams%c_0              = 1
1290.   inparams%p_0              = 1
1291.   inparams%dimensions       = dimen
1292.   inparams%Nx               = grid
1293.   inparams%Ny               = abs(dimen > 1) * grid + abs(dimen /= 2)
1294.   inparams%Nz               = abs(dimen > 2) * grid + abs(dimen /= 3)
1295.   inparams%cnst             = 10.0 / 7.0 / 3.1415926535897932384626433832795
1296.   inparams%totalcells       = inparams%Nx * inparams%Ny * inparams%Nz
1297.   inparams%neighbourscount  = 3 ** inparams%dimensions
1298.
1299.   if (inparams%dimensions == 1) then
1300.     inparams%dim3d = 2; inparams%dim2d = 2
1301.     inparams%Wnx   = 0; inparams%Wny   = 0
1302.   else if (inparams%dimensions == 2) then
1303.     inparams%dim3d = 2; inparams%dim2d = 0
1304.     inparams%Wnx   = 3; inparams%Wny   = 0
1305.   else if (inparams%dimensions == 3) then
1306.     inparams%dim3d = 0; inparams%dim2d = 0
1307.     inparams%Wnx   = 3; inparams%Wny   = 3
1308.   endif
1309. !  print *, "dbg!"
1310.
1312. !  print *, "dbg!"
1313.
1314.   call fthetacoef(inparams)
1315. print *, inparams%ta, inparams%tb, inparams%tc
1316.
1317.   params = inparams
1318.
1319.   allocate(allfluid, allfluidhalf, allfluidnext, allforces)
1320.   allocate(allfluxes(dimen))
1321.   allocate(time_steps(inparams%totalcells))
1322.   allocate(boolflags(inparams%totalcells))
1323.   allocate(tau)
1324.
1325.   print *, "START!"
1326.   call cuda_run(0.01, 30.0, 0.0)
1327.   print *, "DONE CUDA!"
1328. !Mach : |u|/Cs
1329. end program GPU_cSPH_TVD
RAW Paste Data
Top