Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- __global__ void fun_kernal(int *A, float *B, float *X, char *C, int steps){
- int i, j;
- __shared__ int shA [N];
- __shared__ float shB[N];
- __shared__ float shX[N];
- __shared__ char shC[N];
- int index = blockIdx.x * blockDim.x + threadIdx.x;
- int allthreads=gridDim.x*blockDim.x;
- for (int k = index; k < steps * allthreads; k += allthreads){
- //for(int k = steps*index; k < steps + steps * index; k++){
- shA[k]=A[k];
- shB[k]=B[k];
- shX[k]=X[k];
- shC[k]=C[k];
- }
- __syncthreads();
- index*=steps;
- for (int i = 0; i < steps; i++)
- //for (int j=0; j<M; j++)
- shX[index + i] = (float)shA[index + i] * shX[index + i] + (shB[index + i] - shX[index + i]) / shC[index + i];
- __syncthreads();
- for ( int k = index; k <steps + index; k++ )
- X[k] = shX[k];
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement