Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <stdlib.h>
- #include <sys/time.h>
- typedef float v4sf __attribute__ ((mode(V4SF)));
- #define SIZE 32
- #define ITER 1000000
- float *mv_mult(float mat[SIZE][SIZE], float vec[SIZE]) {
- static float ret[SIZE];
- float temp[4];
- int i, j;
- v4sf m, v, r;
- for (i = 0; i < SIZE; i++) {
- r = __builtin_ia32_xorps(r, r);
- for (j = 0; j < SIZE; j += 4) {
- m = __builtin_ia32_loadups(&mat[i][j]);
- v = __builtin_ia32_loadups(&vec[j]);
- v = __builtin_ia32_mulps(m, v);
- r = __builtin_ia32_addps(r, v);
- }
- __builtin_ia32_storeups(temp, r);
- ret[i] = temp[0] + temp[1] + temp[2] + temp[3];
- }
- return ret;
- }
- int main(int argc, char **argv) {
- float mat[SIZE][SIZE];
- float vec[SIZE];
- int i, j;
- struct timeval start, end;
- for (i = 0; i < SIZE; i++) {
- for (j = 0; j < SIZE; j++)
- mat[i][j] = (float)i / (float)(j + 1);
- vec[i] = (float)i;
- }
- gettimeofday(&start, NULL);
- for (i = 0; i < ITER; i++)
- mv_mult(mat, vec);
- gettimeofday(&end, NULL);
- i = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec);
- printf("%d iterations, %d usec\n", ITER, i);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement