Advertisement
Guest User

Untitled

a guest
Nov 30th, 2012
119
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. //N-Body simulation
  2. //Algorithm based on: http://http.developer.nvidia.com/GPUGems3/gpugems3_ch31.html
  3.  
  4. #define BLOCK_SIZE 128
  5.  
  6. cbuffer data                        : register(b0)
  7. {
  8.     uint num_particles;
  9.     uint num_blocks;
  10.     float deltaTime;
  11.     float damping;
  12. };
  13.  
  14. struct PosVel
  15. {
  16.     float4 pos; //xyz = pos, w = weight
  17.     float4 vel; //only xyz
  18. };
  19.  
  20. StructuredBuffer<PosVel>    oldPar  : register(t0);
  21. RWStructuredBuffer<PosVel>  newPar  : register(u0);
  22.  
  23. static float g_FG = 6.67300e-11f;
  24. static float g_softeningFactorSq =  0.0012500000*0.0012500000;
  25. static float g_fParticleMass = 10000.0f * 10000.0f * 10000.0f;
  26.  
  27. groupshared float4 sh_Positions[BLOCK_SIZE];
  28.  
  29. void body_body_interaction(inout float3 ai, float4 bi, float4 bj)
  30. {
  31.     float3 r = bj.xyz - bi.xyz;
  32.  
  33.     float distSqr = dot(r, r);
  34.     distSqr += g_softeningFactorSq;
  35.  
  36.     float distInvCube = 1.0f / sqrt(distSqr * distSqr * distSqr);
  37.  
  38.     //ai += g_FG * bj.w * distInvCube * r;
  39.     ai += g_FG *g_fParticleMass * distInvCube * r;
  40. }
  41.  
  42. [numthreads(BLOCK_SIZE,1,1)]
  43. void CS(uint3 DTid : SV_DispatchThreadID, uint indexGroup : SV_GroupIndex, uint3 Gid : SV_GroupID)
  44. {
  45.     float3 accel = float3(0.0f, 0.0f, 0.0f);
  46.    
  47.     PosVel myParticle = oldPar[DTid.x];
  48.  
  49.     [loop]
  50.     for(uint block=0; block< num_blocks; ++block)
  51.     {
  52.         //Fetch positions to shared cache
  53.         sh_Positions[indexGroup] = oldPar[block * BLOCK_SIZE + indexGroup].pos;
  54.         GroupMemoryBarrierWithGroupSync();
  55.  
  56.         [unroll]
  57.         for(uint i = 0; i<BLOCK_SIZE; i+=8)
  58.         {
  59.             body_body_interaction(accel, myParticle.pos, sh_Positions[i]);
  60.             body_body_interaction(accel, myParticle.pos, sh_Positions[i+1]);
  61.             body_body_interaction(accel, myParticle.pos, sh_Positions[i+2]);
  62.             body_body_interaction(accel, myParticle.pos, sh_Positions[i+3]);
  63.             body_body_interaction(accel, myParticle.pos, sh_Positions[i+4]);
  64.             body_body_interaction(accel, myParticle.pos, sh_Positions[i+5]);
  65.             body_body_interaction(accel, myParticle.pos, sh_Positions[i+6]);
  66.             body_body_interaction(accel, myParticle.pos, sh_Positions[i+7]);
  67.         }
  68.  
  69.         GroupMemoryBarrierWithGroupSync();
  70.     }
  71.    
  72.     //CS returns 0 for memory access out of range (write out of memory are NOPs)
  73.     //So we calculated gravity for many particles from position (0,0,0), because num_particles != spawned_threads in certain scenarios
  74.     //We need to subtract it now
  75.     uint particleDifference = (num_blocks * BLOCK_SIZE) - num_particles;
  76.     float3 a = float3(0.0f, 0.0f, 0.0f);
  77.  
  78.     body_body_interaction(a, myParticle.pos, float4(0.0f, 0.0f, 0.0f, 0.0f));
  79.     accel -= particleDifference * a;
  80.    
  81.     //Calculate new position and velocity
  82.     myParticle.vel.xyz += accel * deltaTime;
  83.     myParticle.pos.xyz += myParticle.vel.xyz * deltaTime;
  84.    
  85.     //Save to global memory
  86.     if(DTid.x < num_particles)
  87.     {
  88.         newPar[DTid.x].pos = myParticle.pos;
  89.         newPar[DTid.x].vel = myParticle.vel;
  90.     }
  91. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement