hedgefund

c_particle_SIMD

Jan 8th, 2025
65
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 5.36 KB | Source Code | 0 0
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <time.h>
  4. #include <immintrin.h> // For AVX intrinsics
  5.  
  6. #define NUM_PARTICLES 100000
  7. #define SIMD_WIDTH 8 // AVX can process 8 floats at a time
  8. #define GRAVITY -9.81f
  9. #define DELTA_TIME 0.01f
  10.  
  11. // Define the ParticleData struct
  12. typedef struct {
  13.     float *positions_x;
  14.     float *positions_y;
  15.     float *positions_z;
  16.     float *velocities_x;
  17.     float *velocities_y;
  18.     float *velocities_z;
  19.     float *accelerations_x;
  20.     float *accelerations_y;
  21.     float *accelerations_z;
  22.     float *masses;
  23. } ParticleData;
  24.  
  25. // Allocate SoA data for particles
  26. void allocate_particle_data(ParticleData *data)
  27. {
  28.     data->positions_x = (float*) aligned_alloc(32, NUM_PARTICLES * sizeof(float));
  29.     data->positions_y = (float*) aligned_alloc(32, NUM_PARTICLES * sizeof(float));
  30.     data->positions_z = (float*) aligned_alloc(32, NUM_PARTICLES * sizeof(float));
  31.     data->velocities_x = (float*) aligned_alloc(32, NUM_PARTICLES * sizeof(float));
  32.     data->velocities_y = (float*) aligned_alloc(32, NUM_PARTICLES * sizeof(float));
  33.     data->velocities_z = (float*) aligned_alloc(32, NUM_PARTICLES * sizeof(float));
  34.     data->accelerations_x = (float*) aligned_alloc(32, NUM_PARTICLES * sizeof(float));
  35.     data->accelerations_y = (float*) aligned_alloc(32, NUM_PARTICLES * sizeof(float));
  36.     data->accelerations_z = (float*) aligned_alloc(32, NUM_PARTICLES * sizeof(float));
  37.     data->masses = (float*) aligned_alloc(32, NUM_PARTICLES * sizeof(float));
  38. }
  39.  
  40. // Initialize particle data
  41. void initialize_particles(ParticleData *data)
  42. {
  43.     srand(time(NULL));
  44.     for (int i = 0; i < NUM_PARTICLES; ++i) {
  45.         data->positions_x[i] = (float)rand() / RAND_MAX * 10.0f;
  46.         data->positions_y[i] = (float)rand() / RAND_MAX * 10.0f;
  47.         data->positions_z[i] = (float)rand() / RAND_MAX * 10.0f;
  48.  
  49.         data->velocities_x[i] = (float)rand() / RAND_MAX * 2.0f - 1.0f;
  50.         data->velocities_y[i] = (float)rand() / RAND_MAX * 2.0f - 1.0f;
  51.         data->velocities_z[i] = (float)rand() / RAND_MAX * 2.0f - 1.0f;
  52.  
  53.         data->accelerations_x[i] = 0.0f;
  54.         data->accelerations_y[i] = 0.0f;
  55.         data->accelerations_z[i] = 0.0f;
  56.  
  57.         data->masses[i] = (float)rand() / RAND_MAX * 2.0f + 1.0f;
  58.     }
  59. }
  60.  
  61. // Free the memory
  62. void free_particle_data(ParticleData *data)
  63. {
  64.     free(data->positions_x);
  65.     free(data->positions_y);
  66.     free(data->positions_z);
  67.     free(data->velocities_x);
  68.     free(data->velocities_y);
  69.     free(data->velocities_z);
  70.     free(data->accelerations_x);
  71.     free(data->accelerations_y);
  72.     free(data->accelerations_z);
  73.     free(data->masses);
  74. }
  75.  
  76. // Update particles using SIMD
  77. void update_particles_simd(ParticleData *data)
  78. {
  79.     __m256 gravity_vector = _mm256_set1_ps(GRAVITY);
  80.     __m256 delta_time_vector = _mm256_set1_ps(DELTA_TIME);
  81.  
  82.     for (int i = 0; i < NUM_PARTICLES; i += SIMD_WIDTH) {
  83.  
  84.         // Load positions, velocities, and accelerations into SIMD vectors
  85.         __m256 px = _mm256_load_ps(data->positions_x + i);
  86.         __m256 py = _mm256_load_ps(data->positions_y + i);
  87.         __m256 pz = _mm256_load_ps(data->positions_z + i);
  88.        
  89.         __m256 vx = _mm256_load_ps(data->velocities_x + i);
  90.         __m256 vy = _mm256_load_ps(data->velocities_y + i);
  91.         __m256 vz = _mm256_load_ps(data->velocities_z + i);
  92.        
  93.         __m256 ax = _mm256_load_ps(data->accelerations_x + i);
  94.         __m256 ay = _mm256_load_ps(data->accelerations_y + i);
  95.         __m256 az = _mm256_load_ps(data->accelerations_z + i);
  96.  
  97.         // Update accelerations (gravity)
  98.         az = _mm256_add_ps(az, gravity_vector);
  99.      
  100.         // Update velocities
  101.         vx = _mm256_add_ps(vx, _mm256_mul_ps(ax, delta_time_vector));
  102.         vy = _mm256_add_ps(vy, _mm256_mul_ps(ay, delta_time_vector));
  103.         vz = _mm256_add_ps(vz, _mm256_mul_ps(az, delta_time_vector));
  104.      
  105.         // Update positions
  106.         px = _mm256_add_ps(px, _mm256_mul_ps(vx, delta_time_vector));
  107.         py = _mm256_add_ps(py, _mm256_mul_ps(vy, delta_time_vector));
  108.         pz = _mm256_add_ps(pz, _mm256_mul_ps(vz, delta_time_vector));
  109.  
  110.         // Store back results
  111.         _mm256_store_ps(data->positions_x + i, px);
  112.         _mm256_store_ps(data->positions_y + i, py);
  113.         _mm256_store_ps(data->positions_z + i, pz);
  114.         _mm256_store_ps(data->velocities_x + i, vx);
  115.         _mm256_store_ps(data->velocities_y + i, vy);
  116.         _mm256_store_ps(data->velocities_z + i, vz);
  117.         _mm256_store_ps(data->accelerations_x + i, ax);
  118.         _mm256_store_ps(data->accelerations_y + i, ay);
  119.         _mm256_store_ps(data->accelerations_z + i, az);
  120.     }
  121. }
  122.  
  123. int main()
  124. {
  125.     ParticleData particle_data;
  126.     allocate_particle_data(&particle_data);
  127.     initialize_particles(&particle_data);
  128.  
  129.     const int num_steps = 100;
  130.  
  131.     printf("Starting SIMD test...\n");
  132.     clock_t start_simd = clock();
  133.     for (int i = 0; i < num_steps; ++i) {
  134.         update_particles_simd(&particle_data);
  135.     }
  136.     clock_t end_simd = clock();
  137.     double time_simd = (double)(end_simd - start_simd) / CLOCKS_PER_SEC * 1000;
  138.     printf("SIMD Time: %.2f ms\n", time_simd);
  139.  
  140.     printf("First position C: x=%.4f, y=%.4f, z=%.4f\n",
  141.            particle_data.positions_x[0],
  142.            particle_data.positions_y[0],
  143.            particle_data.positions_z[0]
  144.     );
  145.     free_particle_data(&particle_data);
  146.  
  147.     return 0;
  148. }
  149.  
Advertisement
Add Comment
Please, Sign In to add comment