Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- __kernel void matrix_mul(__global const float4* a, __global const float4* b, __global float4* c)
- {
- uint i = get_global_id(0);
- float4 result = (float4)(0);
- uint m = i/4; // index of matrix
- m*=4; // offset of matrix
- uint col = i % 4; // column of current element within the matrix
- result.x += b[m + col].x * a[m + 0].x;
- result.x += b[m + col].y * a[m + 1].x;
- result.x += b[m + col].z * a[m + 2].x;
- result.x += b[m + col].w * a[m + 3].x;
- result.y += b[m + col].x * a[m + 0].y;
- result.y += b[m + col].y * a[m + 1].y;
- result.y += b[m + col].z * a[m + 2].y;
- result.y += b[m + col].w * a[m + 3].y;
- result.z += b[m + col].x * a[m + 0].z;
- result.z += b[m + col].y * a[m + 1].z;
- result.z += b[m + col].z * a[m + 2].z;
- result.z += b[m + col].w * a[m + 3].z;
- result.w += b[m + col].x * a[m + 0].w;
- result.w += b[m + col].y * a[m + 1].w;
- result.w += b[m + col].z * a[m + 2].w;
- result.w += b[m + col].w * a[m + 3].w;
- c[i] = result;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement