Advertisement
Guest User

Untitled

a guest
Nov 1st, 2014
211
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 1.02 KB | None | 0 0
  1. __kernel void matrix_mul(__global const float4* a, __global const float4* b, __global float4* c)
  2. {
  3.     uint i = get_global_id(0);
  4.  
  5.     float4 result = (float4)(0);
  6.  
  7.     uint m = i/4; // index of matrix
  8.     m*=4; // offset of matrix
  9.     uint col = i % 4; // column of current element within the matrix
  10.  
  11.     result.x += b[m + col].x * a[m + 0].x;
  12.     result.x += b[m + col].y * a[m + 1].x;
  13.     result.x += b[m + col].z * a[m + 2].x;
  14.     result.x += b[m + col].w * a[m + 3].x;
  15.  
  16.     result.y += b[m + col].x * a[m + 0].y;
  17.     result.y += b[m + col].y * a[m + 1].y;
  18.     result.y += b[m + col].z * a[m + 2].y;
  19.     result.y += b[m + col].w * a[m + 3].y;
  20.  
  21.     result.z += b[m + col].x * a[m + 0].z;
  22.     result.z += b[m + col].y * a[m + 1].z;
  23.     result.z += b[m + col].z * a[m + 2].z;
  24.     result.z += b[m + col].w * a[m + 3].z;
  25.  
  26.     result.w += b[m + col].x * a[m + 0].w;
  27.     result.w += b[m + col].y * a[m + 1].w;
  28.     result.w += b[m + col].z * a[m + 2].w;
  29.     result.w += b[m + col].w * a[m + 3].w;
  30.  
  31.     c[i] = result;
  32. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement