View difference between Paste ID: SJhs8ntt and emij5Hmz
SHOW: | | - or go back to the newest paste.
1
//N-Body simulation
2
//Algorithm based on: http://http.developer.nvidia.com/GPUGems3/gpugems3_ch31.html
3
4
#define BLOCK_SIZE 128
5
6
cbuffer data						: register(b0)
7
{
8
	uint num_particles;
9
	uint num_blocks;
10
	float deltaTime;
11
	float damping;
12
};
13
14
struct PosVel
15
{
16
	float4 pos; //xyz = pos, w = weight
17
	float4 vel; //only xyz
18
};
19
20
StructuredBuffer<PosVel>	oldPar	: register(t0);
21
RWStructuredBuffer<PosVel> 	newPar	: register(u0);
22
23
static float g_FG = 6.67300e-11f;
24
static float g_softeningFactorSq =  0.0012500000*0.0012500000;
25
static float g_fParticleMass = 10000.0f * 10000.0f * 10000.0f;
26
27
groupshared float4 sh_Positions[BLOCK_SIZE];
28
29
void body_body_interaction(inout float3 ai, float4 bi, float4 bj)
30
{
31
	float3 r = bj.xyz - bi.xyz;
32
33
    float distSqr = dot(r, r);
34
    distSqr += g_softeningFactorSq;
35
36
	float distInvCube = 1.0f / sqrt(distSqr * distSqr * distSqr);
37
38-
	//ai += g_FG * /*g_fParticleMass*/bj.w * distInvCube * r;
38+
	//ai += g_FG * bj.w * distInvCube * r;
39
	ai += g_FG *g_fParticleMass * distInvCube * r;
40
}
41
42
[numthreads(BLOCK_SIZE,1,1)]
43
void CS(uint3 DTid : SV_DispatchThreadID, uint indexGroup : SV_GroupIndex, uint3 Gid : SV_GroupID)
44
{
45
	float3 accel = float3(0.0f, 0.0f, 0.0f);
46
	
47
	PosVel myParticle = oldPar[DTid.x];
48
49
	[loop]
50
	for(uint block=0; block< num_blocks; ++block)
51
	{
52
		//Fetch positions to shared cache
53
		sh_Positions[indexGroup] = oldPar[block * BLOCK_SIZE + indexGroup].pos;
54
		GroupMemoryBarrierWithGroupSync();
55
56
		[unroll]
57
		for(uint i = 0; i<BLOCK_SIZE; i+=8)
58
		{
59
			body_body_interaction(accel, myParticle.pos, sh_Positions[i]);
60
			body_body_interaction(accel, myParticle.pos, sh_Positions[i+1]);
61
			body_body_interaction(accel, myParticle.pos, sh_Positions[i+2]);
62
			body_body_interaction(accel, myParticle.pos, sh_Positions[i+3]);
63
			body_body_interaction(accel, myParticle.pos, sh_Positions[i+4]);
64
			body_body_interaction(accel, myParticle.pos, sh_Positions[i+5]);
65
			body_body_interaction(accel, myParticle.pos, sh_Positions[i+6]);
66
			body_body_interaction(accel, myParticle.pos, sh_Positions[i+7]);
67
		}
68
69
		GroupMemoryBarrierWithGroupSync();
70
	}
71
	
72
	//CS returns 0 for memory access out of range (write out of memory are NOPs)
73
	//So we calculated gravity for many particles from position (0,0,0), because num_particles != spawned_threads in certain scenarios
74
	//We need to subtract it now
75
	uint particleDifference = (num_blocks * BLOCK_SIZE) - num_particles;
76
	float3 a = float3(0.0f, 0.0f, 0.0f);
77
78
	body_body_interaction(a, myParticle.pos, float4(0.0f, 0.0f, 0.0f, 0.0f));
79
	accel -= particleDifference * a;
80
	
81
	//Calculate new position and velocity
82
	myParticle.vel.xyz += accel * deltaTime;
83
	myParticle.pos.xyz += myParticle.vel.xyz * deltaTime;
84
	
85
	//Save to global memory
86
	if(DTid.x < num_particles)
87
	{
88
		newPar[DTid.x].pos = myParticle.pos;
89
		newPar[DTid.x].vel = myParticle.vel;
90
	}
91
}