View difference between Paste ID: <a href="/SJhs8ntt">SJhs8ntt</a> and <a href="/emij5Hmz">emij5Hmz</a>

//N-Body simulation
1		//N-Body simulation
2		//Algorithm based on: http://http.developer.nvidia.com/GPUGems3/gpugems3_ch31.html
3
4		#define BLOCK_SIZE 128
5
6		cbuffer data : register(b0)
7		{
8		uint num_particles;
9		uint num_blocks;
10		float deltaTime;
11		float damping;
12		};
13
14		struct PosVel
15		{
16		float4 pos; //xyz = pos, w = weight
17		float4 vel; //only xyz
18		};
19
20		StructuredBuffer<PosVel> oldPar : register(t0);
21		RWStructuredBuffer<PosVel> newPar : register(u0);
22
23		static float g_FG = 6.67300e-11f;
24		static float g_softeningFactorSq = 0.0012500000*0.0012500000;
25		static float g_fParticleMass = 10000.0f * 10000.0f * 10000.0f;
26
27		groupshared float4 sh_Positions[BLOCK_SIZE];
28
29		void body_body_interaction(inout float3 ai, float4 bi, float4 bj)
30		{
31		float3 r = bj.xyz - bi.xyz;
32
33		float distSqr = dot(r, r);
34		distSqr += g_softeningFactorSq;
35
36		float distInvCube = 1.0f / sqrt(distSqr * distSqr * distSqr);
37
38	-	//ai += g_FG * /g_fParticleMass/bj.w * distInvCube * r;
38	+	//ai += g_FG * bj.w * distInvCube * r;
39		ai += g_FG g_fParticleMass distInvCube * r;
40		}
41
42		[numthreads(BLOCK_SIZE,1,1)]
43		void CS(uint3 DTid : SV_DispatchThreadID, uint indexGroup : SV_GroupIndex, uint3 Gid : SV_GroupID)
44		{
45		float3 accel = float3(0.0f, 0.0f, 0.0f);
46
47		PosVel myParticle = oldPar[DTid.x];
48
49		[loop]
50		for(uint block=0; block< num_blocks; ++block)
51		{
52		//Fetch positions to shared cache
53		sh_Positions[indexGroup] = oldPar[block * BLOCK_SIZE + indexGroup].pos;
54		GroupMemoryBarrierWithGroupSync();
55
56		[unroll]
57		for(uint i = 0; i<BLOCK_SIZE; i+=8)
58		{
59		body_body_interaction(accel, myParticle.pos, sh_Positions[i]);
60		body_body_interaction(accel, myParticle.pos, sh_Positions[i+1]);
61		body_body_interaction(accel, myParticle.pos, sh_Positions[i+2]);
62		body_body_interaction(accel, myParticle.pos, sh_Positions[i+3]);
63		body_body_interaction(accel, myParticle.pos, sh_Positions[i+4]);
64		body_body_interaction(accel, myParticle.pos, sh_Positions[i+5]);
65		body_body_interaction(accel, myParticle.pos, sh_Positions[i+6]);
66		body_body_interaction(accel, myParticle.pos, sh_Positions[i+7]);
67		}
68
69		GroupMemoryBarrierWithGroupSync();
70		}
71
72		//CS returns 0 for memory access out of range (write out of memory are NOPs)
73		//So we calculated gravity for many particles from position (0,0,0), because num_particles != spawned_threads in certain scenarios
74		//We need to subtract it now
75		uint particleDifference = (num_blocks * BLOCK_SIZE) - num_particles;
76		float3 a = float3(0.0f, 0.0f, 0.0f);
77
78		body_body_interaction(a, myParticle.pos, float4(0.0f, 0.0f, 0.0f, 0.0f));
79		accel -= particleDifference * a;
80
81		//Calculate new position and velocity
82		myParticle.vel.xyz += accel * deltaTime;
83		myParticle.pos.xyz += myParticle.vel.xyz * deltaTime;
84
85		//Save to global memory
86		if(DTid.x < num_particles)
87		{
88		newPar[DTid.x].pos = myParticle.pos;
89		newPar[DTid.x].vel = myParticle.vel;
90		}
91		}