SHOW:
|
|
- or go back to the newest paste.
1 | //N-Body simulation | |
2 | //Algorithm based on: http://http.developer.nvidia.com/GPUGems3/gpugems3_ch31.html | |
3 | ||
4 | #define BLOCK_SIZE 128 | |
5 | ||
6 | cbuffer data : register(b0) | |
7 | { | |
8 | uint num_particles; | |
9 | uint num_blocks; | |
10 | float deltaTime; | |
11 | float damping; | |
12 | }; | |
13 | ||
14 | struct PosVel | |
15 | { | |
16 | float4 pos; //xyz = pos, w = weight | |
17 | float4 vel; //only xyz | |
18 | }; | |
19 | ||
20 | StructuredBuffer<PosVel> oldPar : register(t0); | |
21 | RWStructuredBuffer<PosVel> newPar : register(u0); | |
22 | ||
23 | static float g_FG = 6.67300e-11f; | |
24 | static float g_softeningFactorSq = 0.0012500000*0.0012500000; | |
25 | static float g_fParticleMass = 10000.0f * 10000.0f * 10000.0f; | |
26 | ||
27 | groupshared float4 sh_Positions[BLOCK_SIZE]; | |
28 | ||
29 | void body_body_interaction(inout float3 ai, float4 bi, float4 bj) | |
30 | { | |
31 | float3 r = bj.xyz - bi.xyz; | |
32 | ||
33 | float distSqr = dot(r, r); | |
34 | distSqr += g_softeningFactorSq; | |
35 | ||
36 | float distInvCube = 1.0f / sqrt(distSqr * distSqr * distSqr); | |
37 | ||
38 | - | //ai += g_FG * /*g_fParticleMass*/bj.w * distInvCube * r; |
38 | + | //ai += g_FG * bj.w * distInvCube * r; |
39 | ai += g_FG *g_fParticleMass * distInvCube * r; | |
40 | } | |
41 | ||
42 | [numthreads(BLOCK_SIZE,1,1)] | |
43 | void CS(uint3 DTid : SV_DispatchThreadID, uint indexGroup : SV_GroupIndex, uint3 Gid : SV_GroupID) | |
44 | { | |
45 | float3 accel = float3(0.0f, 0.0f, 0.0f); | |
46 | ||
47 | PosVel myParticle = oldPar[DTid.x]; | |
48 | ||
49 | [loop] | |
50 | for(uint block=0; block< num_blocks; ++block) | |
51 | { | |
52 | //Fetch positions to shared cache | |
53 | sh_Positions[indexGroup] = oldPar[block * BLOCK_SIZE + indexGroup].pos; | |
54 | GroupMemoryBarrierWithGroupSync(); | |
55 | ||
56 | [unroll] | |
57 | for(uint i = 0; i<BLOCK_SIZE; i+=8) | |
58 | { | |
59 | body_body_interaction(accel, myParticle.pos, sh_Positions[i]); | |
60 | body_body_interaction(accel, myParticle.pos, sh_Positions[i+1]); | |
61 | body_body_interaction(accel, myParticle.pos, sh_Positions[i+2]); | |
62 | body_body_interaction(accel, myParticle.pos, sh_Positions[i+3]); | |
63 | body_body_interaction(accel, myParticle.pos, sh_Positions[i+4]); | |
64 | body_body_interaction(accel, myParticle.pos, sh_Positions[i+5]); | |
65 | body_body_interaction(accel, myParticle.pos, sh_Positions[i+6]); | |
66 | body_body_interaction(accel, myParticle.pos, sh_Positions[i+7]); | |
67 | } | |
68 | ||
69 | GroupMemoryBarrierWithGroupSync(); | |
70 | } | |
71 | ||
72 | //CS returns 0 for memory access out of range (write out of memory are NOPs) | |
73 | //So we calculated gravity for many particles from position (0,0,0), because num_particles != spawned_threads in certain scenarios | |
74 | //We need to subtract it now | |
75 | uint particleDifference = (num_blocks * BLOCK_SIZE) - num_particles; | |
76 | float3 a = float3(0.0f, 0.0f, 0.0f); | |
77 | ||
78 | body_body_interaction(a, myParticle.pos, float4(0.0f, 0.0f, 0.0f, 0.0f)); | |
79 | accel -= particleDifference * a; | |
80 | ||
81 | //Calculate new position and velocity | |
82 | myParticle.vel.xyz += accel * deltaTime; | |
83 | myParticle.pos.xyz += myParticle.vel.xyz * deltaTime; | |
84 | ||
85 | //Save to global memory | |
86 | if(DTid.x < num_particles) | |
87 | { | |
88 | newPar[DTid.x].pos = myParticle.pos; | |
89 | newPar[DTid.x].vel = myParticle.vel; | |
90 | } | |
91 | } |