Advertisement
Guest User

Untitled

a guest
Oct 19th, 2012
207
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 2.07 KB | None | 0 0
  1. #include <stdlib.h>
  2. #include <stdio.h>
  3. #include <math.h>
  4.  
  5. // Thread block size
  6. #define BLOCK_SIZE 16
  7. #define TILE_SIZE  16
  8.  
  9. #define WA 512   // Matrix A width
  10. #define HA 512   // Matrix A height
  11.  
  12. __global__ void
  13. poisson( float* A, int wA)
  14. {
  15.  
  16.    // 1. 2D Thread ID
  17.    int iid=0;
  18.    //float value = 0.000000001;
  19.    //float delta_u=-1.0;
  20.    do
  21.    {
  22.    int tx = blockIdx.x * TILE_SIZE + threadIdx.x;
  23.    int ty = blockIdx.y * TILE_SIZE + threadIdx.y;
  24.    //int tx = threadIdx.x;
  25.    //int ty = threadIdx.y;
  26.  
  27.     if (ty>1 && ty<WA-1 && tx>1 && tx<HA-1)
  28.         {
  29.         //float tmp_delta_u=A[ty * wA + tx];
  30.  
  31.         float elementA = A[(ty*wA) + (tx+1)];
  32.         elementA += A[(ty*wA) + (tx-1)];
  33.         elementA += A[((ty+1)*wA) + tx];
  34.         elementA += A[((ty-1)*wA) + tx];
  35.         elementA *= 0.25;
  36.  
  37.         //tmp_delta_u-=elementA ;
  38.         A[ty * wA + tx] = elementA ;
  39.  
  40.         //if (fabs(tmp_delta_u)>delta_u)
  41.         //    delta_u=fabs(tmp_delta_u);
  42.         };
  43.     }
  44.     //while (value<delta_u);
  45.     while (iid++<20000);
  46.  
  47. }
  48.  
  49. void randomInit(float* data, int size)
  50. {
  51.     for (int i = 0; i < size; ++i)
  52.         {
  53.         data[i] = i%13;
  54.         };
  55. }
  56.  
  57. /////////////////////////////////////////////////////////
  58. // Program main
  59. /////////////////////////////////////////////////////////
  60.  
  61. int
  62. main(int argc, char** argv)
  63. {
  64.  
  65.     srand(10);
  66.  
  67.     unsigned int size_A = WA * HA;
  68.     unsigned int mem_size_A = sizeof(float) * size_A;
  69.     float* h_A = (float*) malloc(mem_size_A);
  70.  
  71.     randomInit(h_A, size_A);
  72.  
  73.     float* d_A;
  74.     cudaMalloc((void**) &d_A, mem_size_A);
  75.  
  76.     cudaMemcpy(d_A, h_A, mem_size_A,
  77.                cudaMemcpyHostToDevice);
  78.  
  79.  
  80.     dim3 dimBlock(BLOCK_SIZE,BLOCK_SIZE);
  81.     dim3 dimGrid( WA/BLOCK_SIZE,HA/BLOCK_SIZE);
  82.  
  83.     poisson<<<dimGrid, dimBlock>>>(d_A,WA);
  84.     cudaMemcpy(h_A, d_A, mem_size_A,
  85.                cudaMemcpyDeviceToHost);
  86.  
  87.     printf("\n\nMatrix C (Results)\n");
  88.     for(int i = 0; i < size_A; i++)
  89.     {
  90.        printf("%.1f ", h_A[i]);
  91.        if(((i + 1) % WA) == 0)
  92.           printf("\n");
  93.     }
  94.     printf("\n");
  95.  
  96.     // 7. clean up memory
  97.     free(h_A);
  98.     cudaFree(d_A);
  99.  
  100. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement