Advertisement
Guest User

Untitled

a guest
Jul 22nd, 2017
140
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. //nvcc heat_2d.cu -o heat_2d -Xlinker -framework,OpenGL,-framework,GLUT -I../common
  2.  
  3. #include "cuda.h"
  4. #include "common/book.h"
  5. #include "common/cpu_anim.h"
  6.  
  7. #define DIM 64
  8.  
  9. #define PIXELS_PER_CELL 8
  10.  
  11. // these exist on the GPU side
  12. texture<char,2>  texIn;
  13. texture<char,2>  texOut;
  14.  
  15. __global__ void blend_kernel( char *dst, char dstOut ) {
  16.     // map from threadIdx/BlockIdx to pixel position
  17.     int x = threadIdx.x + blockIdx.x * blockDim.x;
  18.     int y = threadIdx.y + blockIdx.y * blockDim.y;
  19.     int offset = x + y * blockDim.x * gridDim.x;
  20.  
  21.     char   t, l, c, r, b;
  22.     char tl,tr,bl,br;
  23.     if (dstOut) {
  24.         t = tex2D(texIn,x,y-1);
  25.         l = tex2D(texIn,x-1,y);
  26.         c = tex2D(texIn,x,y);
  27.         r = tex2D(texIn,x+1,y);
  28.         b = tex2D(texIn,x,y+1);
  29.         tl = tex2D(texIn,x-1,y-1);
  30.         tr = tex2D(texIn,x+1,y-1);
  31.         bl = tex2D(texIn,x-1,y+1);
  32.         br = tex2D(texIn,x+1,y+1);
  33.     } else {
  34.         t = tex2D(texOut,x,y-1);
  35.         l = tex2D(texOut,x-1,y);
  36.         c = tex2D(texOut,x,y);
  37.         r = tex2D(texOut,x+1,y);
  38.         b = tex2D(texOut,x,y+1);
  39.         tl = tex2D(texOut,x-1,y-1);
  40.         tr = tex2D(texOut,x+1,y-1);
  41.         bl = tex2D(texOut,x-1,y+1);
  42.         br = tex2D(texOut,x+1,y+1);
  43.     }
  44.     char count = t + l + r + b + tl + tr + bl + br; /* ranges from 0 - 4 */
  45.     if( c ) {
  46.         switch( count ) {
  47.             case 2:
  48.             case 3:
  49.                 dst[offset] = 1;
  50.                 break;
  51.             default:
  52.                 dst[offset] = 0;
  53.         }
  54.     } else {
  55.         switch( count ) {
  56.             case 3:
  57.                 dst[offset] = 1;
  58.                 break;
  59.             default:
  60.                 dst[offset] = 0;
  61.                 break;
  62.         }
  63.     }    
  64. }
  65.  
  66. // globals needed by the update routine
  67. struct DataBlock {
  68.     unsigned char   *output_bitmap;
  69.     char           *dev_inSrc;
  70.     char           *dev_outSrc;
  71.     CPUAnimBitmap  *bitmap;
  72.  
  73.     cudaEvent_t     start, stop;
  74.     float           totalTime;
  75.     float           frames;
  76. };
  77. __device__ void set_color( unsigned char * data, int offset, unsigned char r, unsigned char g, unsigned char b, unsigned char a );
  78. __device__ void set_color( unsigned char * data, int offset, unsigned char r, unsigned char g, unsigned char b, unsigned char a ) {
  79.     data[offset*4 + 0] = r;
  80.     data[offset*4 + 1] = g;
  81.     data[offset*4 + 2] = b;
  82.     data[offset*4 + 3] = a;
  83. }
  84.  
  85. __global__ void bool_to_color( unsigned char *optr, const char *outSrc ) {
  86.     // map from threadIdx/BlockIdx to pixel position
  87.     int x = threadIdx.x + blockIdx.x * blockDim.x;
  88.     int y = threadIdx.y + blockIdx.y * blockDim.y;
  89.        
  90.     int offset = (x*PIXELS_PER_CELL) + ((y*PIXELS_PER_CELL) * DIM * PIXELS_PER_CELL);
  91.  
  92.     char l = outSrc[x+y*DIM];
  93.     for( int i = 0; i < PIXELS_PER_CELL; i++  ) {
  94.         for( int j = 0; j < PIXELS_PER_CELL; j++ ) {
  95.             if( l ) {
  96.                 set_color( optr, offset + (i) + (j * DIM * PIXELS_PER_CELL), 255, 255, 255, 255 );
  97.             } else {
  98.                 set_color( optr, offset + (i) + (j * DIM * PIXELS_PER_CELL), 0, 0, 0, 255 );
  99.             }
  100.         }
  101.     }
  102. }
  103.  
  104.  
  105. volatile char dstOut = true;
  106. void anim_gpu( DataBlock *d, int ticks ) {
  107.     /* Start Timer */
  108.     HANDLE_ERROR( cudaEventRecord( d->start, 0 ) );
  109.    
  110.     /* create kernel */
  111.     dim3    blocks(DIM/16,DIM/16);
  112.     dim3    threads(16,16);
  113.     CPUAnimBitmap  *bitmap = d->bitmap;
  114.  
  115.     /* figure out which buffer to write to */
  116.     char * out = dstOut ? d->dev_outSrc : d->dev_inSrc;
  117.    
  118.     /* process state */
  119.     blend_kernel<<<blocks,threads>>>( out, dstOut );
  120.    
  121.     /* flip buffer */
  122.     dstOut = !dstOut;
  123.    
  124.     /* Turn Cells into pixels */
  125.     bool_to_color<<<blocks,threads>>>( d->output_bitmap, out );
  126.     /* Copy pixels back to host */
  127.     HANDLE_ERROR( cudaMemcpy( bitmap->get_ptr(), d->output_bitmap, bitmap->image_size(), cudaMemcpyDeviceToHost ) );
  128.  
  129.     /* Stop Timer */
  130.     HANDLE_ERROR( cudaEventRecord( d->stop, 0 ) );
  131.     HANDLE_ERROR( cudaEventSynchronize( d->stop ) );
  132.     float   elapsedTime;
  133.     HANDLE_ERROR( cudaEventElapsedTime( &elapsedTime, d->start, d->stop ) );
  134.     d->totalTime += elapsedTime;
  135.     ++d->frames;
  136.     printf( "Average Time per frame:  %3.1f ms\n", d->totalTime/d->frames  );
  137. }
  138.  
  139. // clean up memory allocated on the GPU
  140. void anim_exit( DataBlock *d ) {
  141.     cudaUnbindTexture( texIn );
  142.     cudaUnbindTexture( texOut );
  143.     HANDLE_ERROR( cudaFree( d->dev_inSrc ) );
  144.     HANDLE_ERROR( cudaFree( d->dev_outSrc ) );
  145.  
  146.     HANDLE_ERROR( cudaEventDestroy( d->start ) );
  147.     HANDLE_ERROR( cudaEventDestroy( d->stop ) );
  148. }
  149.  
  150. void _zero_matrix( int imageSize ) {
  151.     char *temp = (char*)malloc( imageSize );
  152.     for (int i=0; i<DIM*DIM; i++) {
  153.         temp[i] = 0;
  154.     }
  155.     free( temp );
  156. }
  157.  
  158. void _create_life( DataBlock * data, int imageSize ) {
  159.     char *temp = (char*)malloc( imageSize );
  160.    
  161.     // first square
  162.     temp[1 + 6 * DIM] = 1;
  163.     temp[2 + 6 * DIM] = 1;
  164.     temp[1 + 7 * DIM] = 1;
  165.     temp[2 + 7 * DIM] = 1;
  166.  
  167.     // little rectangle
  168.     temp[31 + 2 * DIM] = 1;
  169.     temp[31 + 3 * DIM] = 1;
  170.  
  171.     //second square
  172.     temp[35 + 4 * DIM] = 1;
  173.     temp[36 + 4 * DIM] = 1;
  174.     temp[35 + 5 * DIM] = 1;
  175.     temp[36 + 5 * DIM] = 1;
  176.  
  177.     //first glider
  178.    
  179.     temp[14 + 3 * DIM] = 1;
  180.  
  181.     temp[13 + 4 * DIM] = 1;
  182.     temp[15 + 4 * DIM] = 1;
  183.  
  184.     temp[12 + 5 * DIM] = 1;
  185.     temp[16 + 5 * DIM] = 1;
  186.     temp[17 + 5 * DIM] = 1;
  187.  
  188.     temp[12 + 6 * DIM] = 1;
  189.     temp[16 + 6 * DIM] = 1;
  190.     temp[17 + 6 * DIM] = 1;
  191.  
  192.     temp[12 + 7 * DIM] = 1;
  193.     temp[16 + 7 * DIM] = 1;
  194.     temp[17 + 7 * DIM] = 1;
  195.  
  196.     temp[13 + 8 * DIM] = 1;
  197.     temp[15 + 8 * DIM] = 1;
  198.  
  199.     temp[14 + 9 * DIM] = 1;
  200.  
  201.     // second glider
  202.     temp[26 + 1 * DIM] = 1;
  203.  
  204.     temp[23 + 2 * DIM] = 1;
  205.     temp[24 + 2 * DIM] = 1;
  206.     temp[25 + 2 * DIM] = 1;
  207.     temp[26 + 2 * DIM] = 1;
  208.  
  209.     temp[22 + 3 * DIM] = 1;
  210.     temp[23 + 3 * DIM] = 1;
  211.     temp[24 + 3 * DIM] = 1;
  212.     temp[25 + 3 * DIM] = 1;
  213.  
  214.     temp[22 + 4 * DIM] = 1;
  215.     temp[25 + 4 * DIM] = 1;
  216.  
  217.     temp[22 + 5 * DIM] = 1;
  218.     temp[23 + 5 * DIM] = 1;
  219.     temp[24 + 5 * DIM] = 1;
  220.     temp[25 + 5 * DIM] = 1;
  221.  
  222.     temp[23 + 6 * DIM] = 1;
  223.     temp[24 + 6 * DIM] = 1;
  224.     temp[25 + 6 * DIM] = 1;
  225.     temp[26 + 6 * DIM] = 1;
  226.  
  227.     temp[26 + 7 * DIM] = 1;
  228.  
  229.     // first offspring
  230.     temp[23 + 10 * DIM] = 1;
  231.     temp[25 + 10 * DIM] = 1;
  232.  
  233.     temp[24 + 11 * DIM] = 1;
  234.     temp[25 + 11 * DIM] = 1;
  235.  
  236.     temp[24 + 12 * DIM] = 1;
  237.  
  238.     // second offspring
  239.     temp[31 + 17 * DIM] = 1;
  240.  
  241.     temp[32 + 18 * DIM] = 1;
  242.     temp[33 + 18 * DIM] = 1;
  243.  
  244.     temp[31 + 19 * DIM] = 1;
  245.     temp[32 + 19 * DIM] = 1;
  246.  
  247.     // third offspring
  248.     temp[36 + 25 * DIM] = 1;
  249.     temp[38 + 25 * DIM] = 1;
  250.  
  251.     temp[37 + 26 * DIM] = 1;
  252.     temp[38 + 26 * DIM] = 1;
  253.  
  254.     temp[37 + 27 * DIM] = 1;
  255.  
  256.     // fourth offspring
  257.     temp[46 + 32 * DIM] = 1;
  258.  
  259.     temp[47 + 33 * DIM] = 1;
  260.     temp[48 + 33 * DIM] = 1;
  261.  
  262.     temp[46 + 34 * DIM] = 1;
  263.     temp[47 + 34 * DIM] = 1;
  264.    
  265.    
  266.     //init the life into the first buffer
  267.     HANDLE_ERROR( cudaMemcpy( data->dev_inSrc, temp, imageSize, cudaMemcpyHostToDevice ) );
  268.     free( temp );
  269. }
  270.  
  271.  
  272. int main( void ) {
  273.     DataBlock   data;
  274.     CPUAnimBitmap bitmap( DIM * PIXELS_PER_CELL, DIM * PIXELS_PER_CELL, &data );
  275.     data.bitmap = &bitmap;
  276.     data.totalTime = 0;
  277.     data.frames = 0;
  278.     HANDLE_ERROR( cudaEventCreate( &data.start ) );
  279.     HANDLE_ERROR( cudaEventCreate( &data.stop ) );
  280.  
  281.     int imageSize = bitmap.image_size();
  282.  
  283.     HANDLE_ERROR( cudaMalloc( (void**)&data.output_bitmap, imageSize ) );
  284.  
  285.     // assume float == 4 chars in size (ie rgba)
  286.     HANDLE_ERROR( cudaMalloc( (void**)&data.dev_inSrc, imageSize ) );
  287.     HANDLE_ERROR( cudaMalloc( (void**)&data.dev_outSrc, imageSize ) );
  288.  
  289.     cudaChannelFormatDesc desc = cudaCreateChannelDesc<char>();
  290.  
  291.     HANDLE_ERROR( cudaBindTexture2D( NULL, texIn, data.dev_inSrc, desc, DIM, DIM, sizeof(char) * DIM ) );
  292.  
  293.     HANDLE_ERROR( cudaBindTexture2D( NULL, texOut, data.dev_outSrc, desc, DIM, DIM, sizeof(char) * DIM ) );
  294.  
  295.     //Zero the cell matrix
  296.     _zero_matrix( imageSize );
  297.        
  298.     //Add LIFE!
  299.     _create_life( &data, imageSize );
  300.    
  301.  
  302.     bitmap.anim_and_exit( (void (*)(void*,int))anim_gpu, (void (*)(void*))anim_exit );
  303. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement