Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // Vectorization benchmark test program
- // Author: Sander Mertens
- // Make:
- // clang -O2 -Wall -pedantic -ffast-math vectorize_test.c -o vectorize_test
- #include <sys/time.h>
- #include <stdio.h>
- #include <stdlib.h>
- /* Position component */
- typedef struct Position {
- float x;
- float y;
- } Position;
- /* Speed component */
- typedef float Speed;
- /* Struct w Position, Speed + additional data to better mimic actual application */
- typedef struct Entity {
- Position p;
- float size;
- float angle;
- Speed s;
- int mass;
- float foobar;
- Position v;
- } Entity;
- unsigned long long ustime() {
- struct timeval tm;
- gettimeofday(&tm, NULL);
- return tm.tv_sec * 1000000 + tm.tv_usec;
- }
- double measure(unsigned long long start) {
- return (double)(ustime() - start) / 1000000.00;
- }
- /** Run benchmarks.
- * For each scenario, run a cold and a warm test, which measure the difference
- * between whether data still has to be loaded from RAM vs whether the data is
- * already in the cache.
- */
- void benchmark(int count) {
- unsigned long long start;
- printf("\n-- Preparing data for %d entities\n", count);
- /* Each attribute its own array */
- start = ustime();
- float *x = malloc(count * sizeof(float));
- float *y = malloc(count * sizeof(float));
- float *s = malloc(count * sizeof(float));
- float *x_novec = malloc(count * sizeof(float));
- float *y_novec = malloc(count * sizeof(float));
- float *s_novec = malloc(count * sizeof(float));
- printf("SoA allocation time = %f\n", measure(start));
- /* Store Position and Speed in separate arrays */
- start = ustime();
- Position *positions = malloc(count * sizeof(Position));
- Speed *speeds = malloc(count * sizeof(Speed));
- Position *positions_novec = malloc(count * sizeof(Position));
- Speed *speeds_novec = malloc(count * sizeof(Speed));
- printf("SoA component allocation time = %f\n", measure(start));
- /* Store entity structs in array */
- start = ustime();
- Entity *entities = malloc(count * sizeof(Entity));
- Entity *entities_novec = malloc(count * sizeof(Entity));
- printf("AoS allocation time = %f\n", measure(start));
- /* Store entities in separate blocks on the heap */
- Entity **entity_ptrs = malloc(count * sizeof(Entity*));
- void **garbage_ptrs = malloc(count * sizeof(void*));
- start = ustime();
- for (int i = 0; i < count; i ++) {
- entity_ptrs[i] = malloc(sizeof(Entity));
- /* Add garbage inbetween struct allocations. This better simulates
- * actual OOP-style applications, since objects are not typically all
- * allocated at the same moment.
- * Without this line, allocated structs most likely end up being in
- * consecutive memory, except for the tests with large numbers of
- * entities.*/
- garbage_ptrs[i] = malloc(64);
- }
- printf("Heap allocation time ~ %f\n", measure(start));
- printf("-- Start benchmarks\n");
- /* -- SoA (attributes) -- */
- start = ustime();
- #pragma clang loop vectorize(enable)
- for(int i = 0; i < count; i ++) {
- x[i] += s[i];
- y[i] += s[i];
- }
- printf(" SoA, cold: %f (V)\n", measure(start));
- start = ustime();
- #pragma clang loop vectorize(enable)
- for(int i = 0; i < count; i ++) {
- x[i] += s[i];
- y[i] += s[i];
- }
- printf(" SoA, warm: %f (V)\n", measure(start));
- /* No vectorization */
- start = ustime();
- #pragma clang loop vectorize(disable)
- for(int i = 0; i < count; i ++) {
- x_novec[i] += s_novec[i];
- y_novec[i] += s_novec[i];
- }
- printf(" SoA, cold: %f\n", measure(start));
- start = ustime();
- #pragma clang loop vectorize(disable)
- for(int i = 0; i < count; i ++) {
- x_novec[i] += s_novec[i];
- y_novec[i] += s_novec[i];
- }
- printf(" SoA, warm: %f\n", measure(start));
- /* -- SoA (components) -- */
- start = ustime();
- #pragma clang loop vectorize(enable)
- for(int i = 0; i < count; i ++) {
- positions[i].x += speeds[i];
- positions[i].y += speeds[i];
- }
- printf(" SoA (components), cold: %f (V)\n", measure(start));
- start = ustime();
- #pragma clang loop vectorize(enable)
- for(int i = 0; i < count; i ++) {
- positions[i].x += speeds[i];
- positions[i].y += speeds[i];
- }
- printf(" SoA (components), warm: %f (V)\n", measure(start));
- /* No vectorization */
- start = ustime();
- #pragma clang loop vectorize(disable)
- for(int i = 0; i < count; i ++) {
- positions_novec[i].x += speeds_novec[i];
- positions_novec[i].y += speeds_novec[i];
- }
- printf(" SoA (components), cold: %f\n", measure(start));
- start = ustime();
- #pragma clang loop vectorize(disable)
- for(int i = 0; i < count; i ++) {
- positions_novec[i].x += speeds_novec[i];
- positions_novec[i].y += speeds_novec[i];
- }
- printf(" SoA (components), warm: %f\n", measure(start));
- /* -- AoS -- */
- start = ustime();
- #pragma clang loop vectorize(enable)
- for(int i = 0; i < count; i ++) {
- entities[i].p.x += entities[i].s;
- entities[i].p.y += entities[i].s;
- }
- printf(" AoS, cold: %f (V)\n", measure(start));
- start = ustime();
- #pragma clang loop vectorize(enable)
- for(int i = 0; i < count; i ++) {
- entities[i].p.x += entities[i].s;
- entities[i].p.y += entities[i].s;
- }
- printf(" AoS, warm: %f (V)\n", measure(start));
- /* No vectorization */
- start = ustime();
- #pragma clang loop vectorize(disable)
- for(int i = 0; i < count; i ++) {
- entities_novec[i].p.x += entities_novec[i].s;
- entities_novec[i].p.y += entities_novec[i].s;
- }
- printf(" AoS, cold: %f\n", measure(start));
- start = ustime();
- #pragma clang loop vectorize(disable)
- for(int i = 0; i < count; i ++) {
- entities_novec[i].p.x += entities_novec[i].s;
- entities_novec[i].p.y += entities_novec[i].s;
- }
- printf(" AoS, warm: %f\n", measure(start));
- /* -- Heap blocks -- */
- start = ustime();
- #pragma clang loop vectorize(disable)
- for(int i = 0; i < count; i ++) {
- Entity *e = entity_ptrs[i];
- e->p.x += e->s;
- e->p.y += e->s;
- }
- printf(" Heap blocks, cold: %f\n", measure(start));
- start = ustime();
- #pragma clang loop vectorize(disable)
- for(int i = 0; i < count; i ++) {
- Entity *e = entity_ptrs[i];
- e->p.x += e->s;
- e->p.y += e->s;
- }
- printf(" Heap blocks, warm: %f\n", measure(start));
- /* Cleanup */
- printf("-- Cleaning up data\n");
- free(x);
- free(y);
- free(s);
- free(x_novec);
- free(y_novec);
- free(s_novec);
- free(positions);
- free(speeds);
- free(entities);
- free(positions_novec);
- free(speeds_novec);
- free(entities_novec);
- for (int i = 0; i < count; i ++) {
- free(entity_ptrs[i]);
- free(garbage_ptrs[i]);
- }
- free(entity_ptrs);
- free(garbage_ptrs);
- printf("-- Benchmarks done\n");
- }
- int main(int argc, char *argv[]) {
- benchmark(100 * 1000);
- benchmark(1000 * 1000);
- benchmark(10 * 1000 * 1000);
- benchmark(50 * 1000 * 1000);
- benchmark(100 * 1000 * 1000);
- benchmark(200 * 1000 * 1000);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment