Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include "mv-mult.h"
- #include <xmmintrin.h>
- // Matrix-Vector multiplication
- // mat is a SIZE by SIZE matrix, that is arranged in row-column, format,
- // That is, you first select a particular row, and then a particular column.
- // Each row is laid out as a one-dimensional, array, so if you wanted
- // to select a particular row, you would use mat[row]. You can
- // also select smaller intervals, by using &mat[row][col].
- // The vector is also laid out as a one-dimensional arrow, similar to a row.
- // M-V multiplication proceeds by taking the dot product of a matrix row
- // with the vector, and doing this for each row in the matrix
- // vectorize the below code using SIMD intrinsics
- float *
- mv_mult_vector(float mat[SIZE][SIZE], float vec[SIZE]) {
- static float ret[SIZE];
- // for (int i = 0; i < SIZE; i ++) {
- // ret[i] = 0;
- // for (int j = 0; j < SIZE; j ++) {
- // ret[i] += mat[i][j] * vec[j];
- // }
- // }
- float inner_product = 0.0, temp[4];
- __m128 acc, mat_128, vec_128; // 4x32-bit float registers
- acc = _mm_set1_ps(0.0); // set all four words in acc to 0.0
- int j = 0;
- for (; j < SIZE; j++) {
- int i = 0;
- acc = _mm_set1_ps(0.0);
- for (; i < (SIZE - 3); i += 4) {
- mat_128 = _mm_loadu_ps(&mat[j][i]); // load groups of four floats
- vec_128 = _mm_loadu_ps(&vec[j]);
- acc = _mm_add_ps(acc, _mm_mul_ps(mat_128, vec_128));
- }
- _mm_storeu_ps(temp, acc); // add the accumulated values
- ret[j] = temp[0] + temp[1] + temp[2] + temp[3];
- for (; i < SIZE; i++) { // add up the remaining floats
- ret[j] += mat[j][i] * vec[j];
- }
- }
- return ret;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement