Guest User

Synthetic vectorization benchmark (clang pragmas)

a guest
Feb 5th, 2019
134
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 7.55 KB | None | 0 0
  1. // Vectorization benchmark test program
  2. // Author: Sander Mertens
  3.  
  4. // Make:
  5. //     clang -O2 -Wall -pedantic -ffast-math vectorize_test.c -o vectorize_test
  6.  
  7. #include <sys/time.h>
  8. #include <stdio.h>
  9. #include <stdlib.h>
  10.  
  11. /* Position component */
  12. typedef struct Position {
  13.     float x;
  14.     float y;
  15. } Position;
  16.  
  17. /* Speed component */
  18. typedef float Speed;
  19.  
  20. /* Struct w Position, Speed + additional data to better mimic actual application */
  21. typedef struct Entity {
  22.     Position p;
  23.     float size;
  24.     float angle;
  25.     Speed s;
  26.     int mass;
  27.     float foobar;
  28.     Position v;
  29. } Entity;
  30.  
  31. unsigned long long ustime() {
  32.     struct timeval tm;
  33.     gettimeofday(&tm, NULL);
  34.     return tm.tv_sec * 1000000 + tm.tv_usec;
  35. }
  36.  
  37. double measure(unsigned long long start) {
  38.     return (double)(ustime() - start) / 1000000.00;
  39. }
  40.  
  41. /** Run benchmarks.
  42.  * For each scenario, run a cold and a warm test, which measure the difference
  43.  * between whether data still has to be loaded from RAM vs whether the data is
  44.  * already in the cache.
  45.  */
  46. void benchmark(int count) {
  47.     unsigned long long start;
  48.  
  49.     printf("\n-- Preparing data for %d entities\n", count);
  50.  
  51.     /* Each attribute its own array */
  52.     start = ustime();
  53.     float *x = malloc(count * sizeof(float));
  54.     float *y = malloc(count * sizeof(float));
  55.     float *s = malloc(count * sizeof(float));
  56.     float *x_novec = malloc(count * sizeof(float));
  57.     float *y_novec = malloc(count * sizeof(float));
  58.     float *s_novec = malloc(count * sizeof(float));
  59.     printf("SoA allocation time = %f\n", measure(start));
  60.  
  61.  
  62.     /* Store Position and Speed in separate arrays */
  63.     start = ustime();
  64.     Position *positions = malloc(count * sizeof(Position));
  65.     Speed *speeds = malloc(count * sizeof(Speed));
  66.     Position *positions_novec = malloc(count * sizeof(Position));
  67.     Speed *speeds_novec = malloc(count * sizeof(Speed));
  68.     printf("SoA component allocation time = %f\n", measure(start));
  69.  
  70.     /* Store entity structs in array */
  71.     start = ustime();
  72.     Entity *entities = malloc(count * sizeof(Entity));
  73.     Entity *entities_novec = malloc(count * sizeof(Entity));
  74.     printf("AoS allocation time = %f\n", measure(start));
  75.  
  76.     /* Store entities in separate blocks on the heap */
  77.     Entity **entity_ptrs = malloc(count * sizeof(Entity*));
  78.     void **garbage_ptrs = malloc(count * sizeof(void*));
  79.  
  80.     start = ustime();
  81.     for (int i = 0; i < count; i ++) {
  82.         entity_ptrs[i] = malloc(sizeof(Entity));
  83.  
  84.         /* Add garbage inbetween struct allocations. This better simulates
  85.          * actual OOP-style applications, since objects are not typically all
  86.          * allocated at the same moment.
  87.          * Without this line, allocated structs most likely end up being in
  88.          * consecutive memory, except for the tests with large numbers of
  89.          * entities.*/
  90.         garbage_ptrs[i] = malloc(64);
  91.     }
  92.     printf("Heap allocation time ~ %f\n", measure(start));
  93.  
  94.     printf("-- Start benchmarks\n");
  95.  
  96.  
  97.     /* -- SoA (attributes) -- */
  98.  
  99.     start = ustime();
  100. #pragma clang loop vectorize(enable)
  101.     for(int i = 0; i < count; i ++) {
  102.         x[i] += s[i];
  103.         y[i] += s[i];
  104.     }
  105.     printf("   SoA, cold:                    %f (V)\n", measure(start));
  106.  
  107.     start = ustime();
  108. #pragma clang loop vectorize(enable)
  109.     for(int i = 0; i < count; i ++) {
  110.         x[i] += s[i];
  111.         y[i] += s[i];
  112.     }
  113.     printf("   SoA, warm:                    %f (V)\n", measure(start));
  114.  
  115.     /* No vectorization */
  116.     start = ustime();
  117. #pragma clang loop vectorize(disable)
  118.     for(int i = 0; i < count; i ++) {
  119.         x_novec[i] += s_novec[i];
  120.         y_novec[i] += s_novec[i];
  121.     }
  122.     printf("   SoA, cold:                    %f\n", measure(start));
  123.  
  124.     start = ustime();
  125. #pragma clang loop vectorize(disable)
  126.     for(int i = 0; i < count; i ++) {
  127.         x_novec[i] += s_novec[i];
  128.         y_novec[i] += s_novec[i];
  129.     }
  130.     printf("   SoA, warm:                    %f\n", measure(start));
  131.  
  132.  
  133.  
  134.     /* -- SoA (components) -- */
  135.  
  136.     start = ustime();
  137. #pragma clang loop vectorize(enable)
  138.     for(int i = 0; i < count; i ++) {
  139.         positions[i].x += speeds[i];
  140.         positions[i].y += speeds[i];
  141.     }
  142.     printf("   SoA (components), cold:       %f (V)\n", measure(start));
  143.  
  144.     start = ustime();
  145. #pragma clang loop vectorize(enable)
  146.     for(int i = 0; i < count; i ++) {
  147.         positions[i].x += speeds[i];
  148.         positions[i].y += speeds[i];
  149.     }
  150.     printf("   SoA (components), warm:       %f (V)\n", measure(start));
  151.  
  152.     /* No vectorization */
  153.     start = ustime();
  154. #pragma clang loop vectorize(disable)
  155.     for(int i = 0; i < count; i ++) {
  156.         positions_novec[i].x += speeds_novec[i];
  157.         positions_novec[i].y += speeds_novec[i];
  158.     }
  159.     printf("   SoA (components), cold:       %f\n", measure(start));
  160.  
  161.     start = ustime();
  162. #pragma clang loop vectorize(disable)
  163.     for(int i = 0; i < count; i ++) {
  164.         positions_novec[i].x += speeds_novec[i];
  165.         positions_novec[i].y += speeds_novec[i];
  166.     }
  167.     printf("   SoA (components), warm:       %f\n", measure(start));
  168.  
  169.  
  170.  
  171.     /* -- AoS -- */
  172.  
  173.     start = ustime();
  174. #pragma clang loop vectorize(enable)
  175.     for(int i = 0; i < count; i ++) {
  176.         entities[i].p.x += entities[i].s;
  177.         entities[i].p.y += entities[i].s;
  178.     }
  179.     printf("   AoS, cold:                    %f (V)\n", measure(start));
  180.  
  181.     start = ustime();
  182. #pragma clang loop vectorize(enable)
  183.     for(int i = 0; i < count; i ++) {
  184.         entities[i].p.x += entities[i].s;
  185.         entities[i].p.y += entities[i].s;
  186.     }
  187.     printf("   AoS, warm:                    %f (V)\n", measure(start));
  188.  
  189.  
  190.     /* No vectorization */
  191.     start = ustime();
  192. #pragma clang loop vectorize(disable)
  193.     for(int i = 0; i < count; i ++) {
  194.         entities_novec[i].p.x += entities_novec[i].s;
  195.         entities_novec[i].p.y += entities_novec[i].s;
  196.     }
  197.     printf("   AoS, cold:                    %f\n", measure(start));
  198.  
  199.     start = ustime();
  200. #pragma clang loop vectorize(disable)
  201.     for(int i = 0; i < count; i ++) {
  202.         entities_novec[i].p.x += entities_novec[i].s;
  203.         entities_novec[i].p.y += entities_novec[i].s;
  204.     }
  205.     printf("   AoS, warm:                    %f\n", measure(start));
  206.  
  207.  
  208.  
  209.     /* -- Heap blocks -- */
  210.  
  211.     start = ustime();
  212. #pragma clang loop vectorize(disable)
  213.     for(int i = 0; i < count; i ++) {
  214.         Entity *e = entity_ptrs[i];
  215.         e->p.x += e->s;
  216.         e->p.y += e->s;
  217.     }
  218.     printf("   Heap blocks, cold:            %f\n", measure(start));
  219.  
  220.     start = ustime();
  221. #pragma clang loop vectorize(disable)
  222.     for(int i = 0; i < count; i ++) {
  223.         Entity *e = entity_ptrs[i];
  224.         e->p.x += e->s;
  225.         e->p.y += e->s;
  226.     }
  227.     printf("   Heap blocks, warm:            %f\n", measure(start));
  228.  
  229.  
  230.     /* Cleanup */
  231.     printf("-- Cleaning up data\n");
  232.     free(x);
  233.     free(y);
  234.     free(s);
  235.     free(x_novec);
  236.     free(y_novec);
  237.     free(s_novec);
  238.  
  239.     free(positions);
  240.     free(speeds);
  241.     free(entities);
  242.  
  243.     free(positions_novec);
  244.     free(speeds_novec);
  245.     free(entities_novec);
  246.  
  247.     for (int i = 0; i < count; i ++) {
  248.         free(entity_ptrs[i]);
  249.         free(garbage_ptrs[i]);
  250.     }
  251.     free(entity_ptrs);
  252.     free(garbage_ptrs);
  253.  
  254.     printf("-- Benchmarks done\n");
  255. }
  256.  
  257. int main(int argc, char *argv[]) {
  258.     benchmark(100 * 1000);
  259.     benchmark(1000 * 1000);
  260.     benchmark(10 * 1000 * 1000);
  261.     benchmark(50 * 1000 * 1000);
  262.     benchmark(100 * 1000 * 1000);
  263.     benchmark(200 * 1000 * 1000);
  264.     return 0;
  265. }
Advertisement
Add Comment
Please, Sign In to add comment