Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <stdlib.h>
- #include <time.h>
- #include <immintrin.h> // For AVX intrinsics
- #define NUM_PARTICLES 100000
- #define SIMD_WIDTH 8 // AVX can process 8 floats at a time
- #define GRAVITY -9.81f
- #define DELTA_TIME 0.01f
- // Define the ParticleData struct
- typedef struct {
- float *positions_x;
- float *positions_y;
- float *positions_z;
- float *velocities_x;
- float *velocities_y;
- float *velocities_z;
- float *accelerations_x;
- float *accelerations_y;
- float *accelerations_z;
- float *masses;
- } ParticleData;
- // Allocate SoA data for particles
- void allocate_particle_data(ParticleData *data)
- {
- data->positions_x = (float*) aligned_alloc(32, NUM_PARTICLES * sizeof(float));
- data->positions_y = (float*) aligned_alloc(32, NUM_PARTICLES * sizeof(float));
- data->positions_z = (float*) aligned_alloc(32, NUM_PARTICLES * sizeof(float));
- data->velocities_x = (float*) aligned_alloc(32, NUM_PARTICLES * sizeof(float));
- data->velocities_y = (float*) aligned_alloc(32, NUM_PARTICLES * sizeof(float));
- data->velocities_z = (float*) aligned_alloc(32, NUM_PARTICLES * sizeof(float));
- data->accelerations_x = (float*) aligned_alloc(32, NUM_PARTICLES * sizeof(float));
- data->accelerations_y = (float*) aligned_alloc(32, NUM_PARTICLES * sizeof(float));
- data->accelerations_z = (float*) aligned_alloc(32, NUM_PARTICLES * sizeof(float));
- data->masses = (float*) aligned_alloc(32, NUM_PARTICLES * sizeof(float));
- }
- // Initialize particle data
- void initialize_particles(ParticleData *data)
- {
- srand(time(NULL));
- for (int i = 0; i < NUM_PARTICLES; ++i) {
- data->positions_x[i] = (float)rand() / RAND_MAX * 10.0f;
- data->positions_y[i] = (float)rand() / RAND_MAX * 10.0f;
- data->positions_z[i] = (float)rand() / RAND_MAX * 10.0f;
- data->velocities_x[i] = (float)rand() / RAND_MAX * 2.0f - 1.0f;
- data->velocities_y[i] = (float)rand() / RAND_MAX * 2.0f - 1.0f;
- data->velocities_z[i] = (float)rand() / RAND_MAX * 2.0f - 1.0f;
- data->accelerations_x[i] = 0.0f;
- data->accelerations_y[i] = 0.0f;
- data->accelerations_z[i] = 0.0f;
- data->masses[i] = (float)rand() / RAND_MAX * 2.0f + 1.0f;
- }
- }
- // Free the memory
- void free_particle_data(ParticleData *data)
- {
- free(data->positions_x);
- free(data->positions_y);
- free(data->positions_z);
- free(data->velocities_x);
- free(data->velocities_y);
- free(data->velocities_z);
- free(data->accelerations_x);
- free(data->accelerations_y);
- free(data->accelerations_z);
- free(data->masses);
- }
- // Update particles using SIMD
- void update_particles_simd(ParticleData *data)
- {
- __m256 gravity_vector = _mm256_set1_ps(GRAVITY);
- __m256 delta_time_vector = _mm256_set1_ps(DELTA_TIME);
- for (int i = 0; i < NUM_PARTICLES; i += SIMD_WIDTH) {
- // Load positions, velocities, and accelerations into SIMD vectors
- __m256 px = _mm256_load_ps(data->positions_x + i);
- __m256 py = _mm256_load_ps(data->positions_y + i);
- __m256 pz = _mm256_load_ps(data->positions_z + i);
- __m256 vx = _mm256_load_ps(data->velocities_x + i);
- __m256 vy = _mm256_load_ps(data->velocities_y + i);
- __m256 vz = _mm256_load_ps(data->velocities_z + i);
- __m256 ax = _mm256_load_ps(data->accelerations_x + i);
- __m256 ay = _mm256_load_ps(data->accelerations_y + i);
- __m256 az = _mm256_load_ps(data->accelerations_z + i);
- // Update accelerations (gravity)
- az = _mm256_add_ps(az, gravity_vector);
- // Update velocities
- vx = _mm256_add_ps(vx, _mm256_mul_ps(ax, delta_time_vector));
- vy = _mm256_add_ps(vy, _mm256_mul_ps(ay, delta_time_vector));
- vz = _mm256_add_ps(vz, _mm256_mul_ps(az, delta_time_vector));
- // Update positions
- px = _mm256_add_ps(px, _mm256_mul_ps(vx, delta_time_vector));
- py = _mm256_add_ps(py, _mm256_mul_ps(vy, delta_time_vector));
- pz = _mm256_add_ps(pz, _mm256_mul_ps(vz, delta_time_vector));
- // Store back results
- _mm256_store_ps(data->positions_x + i, px);
- _mm256_store_ps(data->positions_y + i, py);
- _mm256_store_ps(data->positions_z + i, pz);
- _mm256_store_ps(data->velocities_x + i, vx);
- _mm256_store_ps(data->velocities_y + i, vy);
- _mm256_store_ps(data->velocities_z + i, vz);
- _mm256_store_ps(data->accelerations_x + i, ax);
- _mm256_store_ps(data->accelerations_y + i, ay);
- _mm256_store_ps(data->accelerations_z + i, az);
- }
- }
- int main()
- {
- ParticleData particle_data;
- allocate_particle_data(&particle_data);
- initialize_particles(&particle_data);
- const int num_steps = 100;
- printf("Starting SIMD test...\n");
- clock_t start_simd = clock();
- for (int i = 0; i < num_steps; ++i) {
- update_particles_simd(&particle_data);
- }
- clock_t end_simd = clock();
- double time_simd = (double)(end_simd - start_simd) / CLOCKS_PER_SEC * 1000;
- printf("SIMD Time: %.2f ms\n", time_simd);
- printf("First position C: x=%.4f, y=%.4f, z=%.4f\n",
- particle_data.positions_x[0],
- particle_data.positions_y[0],
- particle_data.positions_z[0]
- );
- free_particle_data(&particle_data);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment