Advertisement
Guest User

A Maz

a guest
Jan 21st, 2013
292
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 1.58 KB | None | 0 0
  1. #define TILE_WIDTH 16
  2. __kernel void matrixMultiplyShared(__global float* A,
  3.                                  __global float *B,
  4.                                  __global float *C,
  5.                                   int numARows,
  6.                                   int numAColumns,
  7.                                   int numBRows,
  8.                                   int numBColumns,
  9.                                   int numCRows,
  10.                                   int numCColumns)
  11. {
  12.    
  13.     //--------- Write data to private memory  ----------//
  14.     //allocation for a block
  15.     __local float ds_A[TILE_WIDTH][TILE_WIDTH];
  16.     __local float ds_B[TILE_WIDTH][TILE_WIDTH];
  17.    
  18.     int bx = get_group_id(0); int by = get_group_id(1);
  19.     int tx = get_local_id(0); int ty = get_local_id(1);
  20.  
  21.     int row = by * TILE_WIDTH + ty;
  22.     int col = bx * TILE_WIDTH + tx;
  23.  
  24.     float Cvalue = 0.0;
  25.  
  26. // Loop over the M and N tiles required to compute the Pd element
  27.     for (int m = 0; m < (numAColumns-1)/TILE_WIDTH+1; ++m) {
  28.         if(row<numARows && m*TILE_WIDTH+tx < numAColumns){
  29.             ds_A[ty][tx] = A[row*numAColumns + m*TILE_WIDTH+tx];
  30.         } else {
  31.             ds_A[ty][tx] = 0;
  32.         }
  33.         if(m*TILE_WIDTH+ty < numBRows && col < numBColumns){
  34.             ds_B[ty][tx] = B[(m*TILE_WIDTH+ty)*numBColumns+col];
  35.         } else {
  36.             ds_B[ty][tx] = 0;
  37.         }
  38.         barrier(CLK_LOCAL_MEM_FENCE);
  39.         if(row < numCRows && col < numCColumns){
  40.             for (int k = 0; k < TILE_WIDTH; ++k)
  41.                 Cvalue += ds_A[ty][k] * ds_B[k][tx];
  42.         }
  43.         barrier(CLK_LOCAL_MEM_FENCE);
  44.     }
  45.     if(row < numCRows && col < numCColumns)
  46.         C[row*numCColumns+col] = Cvalue;
  47. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement