Advertisement
Guest User

Untitled

a guest
May 3rd, 2016
94
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.78 KB | None | 0 0
  1. __global__ void fun_kernal(int *A, float *B, float *X, char *C, int steps){
  2. int i, j;
  3. __shared__ int shA [N];
  4. __shared__ float shB[N];
  5. __shared__ float shX[N];
  6. __shared__ char shC[N];
  7.  
  8. int index = blockIdx.x * blockDim.x + threadIdx.x;
  9.  
  10. int allthreads=gridDim.x*blockDim.x;
  11.  
  12. for (int k = index; k < steps * allthreads; k += allthreads){
  13. //for(int k = steps*index; k < steps + steps * index; k++){
  14. shA[k]=A[k];
  15. shB[k]=B[k];
  16. shX[k]=X[k];
  17. shC[k]=C[k];
  18. }
  19. __syncthreads();
  20.  
  21. index*=steps;
  22. for (int i = 0; i < steps; i++)
  23. //for (int j=0; j<M; j++)
  24. shX[index + i] = (float)shA[index + i] * shX[index + i] + (shB[index + i] - shX[index + i]) / shC[index + i];
  25. __syncthreads();
  26.  
  27. for ( int k = index; k <steps + index; k++ )
  28. X[k] = shX[k];
  29. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement