Stybyk

kernely cuda

Dec 2nd, 2014
425
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 12.57 KB | None | 0 0
  1. // ***********************************************************************
  2. //
  3. // Demo program pro vyuku predmetu APPS (10/2021)
  4. // Petr Olivka, katedra informatiky, FEI, VSB-TU Ostrava
  5. // email:petr.olivka@vsb.cz
  6. //
  7. // Priklad pouziti CUDA technologie.
  8. // Prevod barevneho obrazku na odstiny sede barvy
  9. //
  10. // ***********************************************************************
  11.  
  12. #include <cuda_runtime.h>
  13. #include <stdio.h>
  14. #include <stdlib.h>
  15. #include <math.h>
  16.  
  17.  
  18. __global__ void resizebig(uchar4 * input ,uchar4 * output ,int sizex, int sizey)
  19. {
  20.  
  21.     // souradnice vlakna
  22.     int y = blockDim.y * blockIdx.y + threadIdx.y;
  23.      if ( y >= sizey ) return;
  24.  
  25.     int x = blockDim.x * blockIdx.x + threadIdx.x;
  26.     if ( x >= sizex ) return;
  27.    
  28.  
  29.  
  30.     for (int j = 1; j <= 1; j++)
  31.    {
  32.     for (int i = -1; i <= 1; i++)
  33.     {
  34.        
  35.            
  36.             output[(y * 2 + j) * sizex * 2 + (x * 2 + i)] = input[y * sizex + x];
  37.     }
  38.    }
  39.  
  40.    
  41.  
  42.  
  43. }
  44.  
  45.  
  46. void run_resize_big(uchar4 *input ,uchar4* output ,int sizex ,int sizey){
  47.  
  48.     cudaError_t erorlog;
  49.     uchar4 * CUDAinput;
  50.     uchar4 * CUDAoutput;
  51.  
  52.    
  53.     erorlog = cudaMalloc( &CUDAinput, sizex * sizey * sizeof( uchar4 ) );
  54.     if ( erorlog != cudaSuccess )
  55.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( erorlog ) ); 
  56.     /// sice by mohlo být paměti víc ... ale vyhrál jsme si :D
  57.     erorlog = cudaMalloc( &CUDAoutput, (sizex*2) * (sizey*2) * sizeof( uchar4 ) );
  58.     if ( erorlog != cudaSuccess )
  59.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( erorlog ) ); 
  60.  
  61.     // prenos barevneho obrazku do videokarty
  62.     erorlog = cudaMemcpy( CUDAinput, input, sizex * sizey* sizeof( uchar4 ), cudaMemcpyHostToDevice );
  63.     if ( erorlog != cudaSuccess )
  64.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( erorlog) );
  65.  
  66.     int block = 16;
  67.     dim3 blocks( ( sizex + block - 1 ) / block, ( sizey + block - 1 ) / block );
  68.     dim3 threads( block, block );
  69.  
  70.  
  71.     /// proste hodiny ney jsem  na to přišel
  72.     resizebig<<< blocks,threads >>> (CUDAinput ,CUDAoutput,sizex ,sizey );
  73.        
  74.  
  75.     if ( ( erorlog = cudaGetLastError() ) != cudaSuccess )
  76.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( erorlog ) );
  77.  
  78.     // prenos dat z videokarty
  79.     erorlog = cudaMemcpy( output, CUDAoutput, (sizex*2) * (sizey *2)* sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  80.     if (erorlog != cudaSuccess )
  81.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( erorlog ) ); 
  82.     cudaThreadSynchronize();
  83.     cudaFree( CUDAinput );
  84.     cudaFree( CUDAoutput );
  85.  
  86. }
  87.  
  88.  
  89. __global__ void resizelow(uchar4 * input ,uchar4 * output ,int sizex, int sizey)
  90. {
  91.  
  92.     // souradnice vlakna
  93.     int y = blockDim.y * blockIdx.y + threadIdx.y;
  94.      if ( y >= sizey ) return;
  95.  
  96.     int x = blockDim.x * blockIdx.x + threadIdx.x;
  97.     if ( x >= sizex ) return;
  98.    
  99.  
  100.     //vyorec na zmenseni
  101.     output[y * sizex + x] = input[(y * 4 * sizex) + x*2];
  102.  
  103.  
  104.  
  105.  
  106.  
  107.  
  108. }
  109.  
  110.  
  111. /// upravou vzorce otocis doleva nebo doprava
  112. __global__ void kernel_rotace(uchar4 * input ,uchar4 * output ,int sizex, int sizey)
  113. {
  114.  
  115.     int x = blockDim.x * blockIdx.x + threadIdx.x;
  116.     if ( x >= sizex ) return;
  117.     int y = blockDim.y * blockIdx.y + threadIdx.y;
  118.     if ( y >= sizey ) return;
  119.  
  120.     /// otoceni doleva
  121.     output[(sizex - x) * sizey + y] = input[y * sizex + x];
  122.    
  123.  
  124.  
  125.     //otoceni doprava
  126.  
  127.     //output[x * sizey + (sizey - y)] =input[y * sizex + x];
  128.    
  129.  
  130.      
  131.  
  132.    
  133. /*
  134.     //vyplneni  barvou
  135.        
  136.     /*
  137.     int position = y * sizex + x;
  138.     output[position].x = 255;
  139.     output[position].y = 0;
  140.     output[position].z = 0;
  141.     */
  142. }
  143.  
  144.  
  145.  
  146. void run_rotace(uchar4 *input ,uchar4* output ,int sizex ,int sizey){
  147.  
  148.     cudaError_t erorlog;
  149.     uchar4 * CUDAinput;
  150.     uchar4 * CUDAoutput;
  151.  
  152.  
  153.     erorlog = cudaMalloc( &CUDAinput, sizex * sizey * sizeof( uchar4 ) );
  154.     if ( erorlog != cudaSuccess )
  155.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( erorlog ) ); 
  156.  
  157.     erorlog = cudaMalloc( &CUDAoutput, sizex * sizey * sizeof( uchar4 ) );
  158.     if ( erorlog != cudaSuccess )
  159.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( erorlog ) ); 
  160.  
  161.     // prenos barevneho obrazku do videokarty
  162.     erorlog = cudaMemcpy( CUDAinput, input, sizex * sizey * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  163.     if ( erorlog != cudaSuccess )
  164.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( erorlog) );
  165.  
  166.     int block = 16;
  167.     dim3 blocks( ( sizex + block - 1 ) / block, ( sizey + block - 1 ) / block );
  168.     dim3 threads( block, block );
  169.  
  170.  
  171.  
  172.     kernel_rotace <<< blocks,threads >>> (CUDAinput ,CUDAoutput,sizex ,sizey );
  173.        
  174.  
  175.     if ( ( erorlog = cudaGetLastError() ) != cudaSuccess )
  176.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( erorlog ) );
  177.  
  178.     // prenos dat z videokarty
  179.     erorlog = cudaMemcpy( output, CUDAoutput, sizex * sizey * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  180.     if (erorlog != cudaSuccess )
  181.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( erorlog ) ); 
  182.     cudaThreadSynchronize();
  183.     cudaFree( CUDAinput );
  184.     cudaFree( CUDAoutput );
  185.  
  186. }
  187.  
  188.  
  189.  
  190.  
  191. void run_resize_low(uchar4 *input ,uchar4* output ,int sizex ,int sizey){
  192.  
  193.     cudaError_t erorlog;
  194.     uchar4 * CUDAinput;
  195.     uchar4 * CUDAoutput;
  196.  
  197.    
  198.     erorlog = cudaMalloc( &CUDAinput, sizex * sizey * sizeof( uchar4 ) );
  199.     if ( erorlog != cudaSuccess )
  200.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( erorlog ) ); 
  201.     /// sice by mohlo být paměti víc ... ale vyhrál jsme si :D
  202.     erorlog = cudaMalloc( &CUDAoutput, (sizex/2) * (sizey/2) * sizeof( uchar4 ) );
  203.     if ( erorlog != cudaSuccess )
  204.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( erorlog ) ); 
  205.  
  206.     // prenos barevneho obrazku do videokarty
  207.     erorlog = cudaMemcpy( CUDAinput, input, sizex * sizey* sizeof( uchar4 ), cudaMemcpyHostToDevice );
  208.     if ( erorlog != cudaSuccess )
  209.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( erorlog) );
  210.  
  211.     int block = 16;
  212.     dim3 blocks( ( sizex + block - 1 ) / block, ( sizey + block - 1 ) / block );
  213.     dim3 threads( block, block );
  214.  
  215.  
  216.     /// proste hodiny ney jsem  na to přišel
  217.     resizelow <<< blocks,threads >>> (CUDAinput ,CUDAoutput,sizex/2 ,sizey/2 );
  218.        
  219.  
  220.     if ( ( erorlog = cudaGetLastError() ) != cudaSuccess )
  221.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( erorlog ) );
  222.  
  223.     // prenos dat z videokarty
  224.     erorlog = cudaMemcpy( output, CUDAoutput, (sizex/2) * (sizey /2)* sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  225.     if (erorlog != cudaSuccess )
  226.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( erorlog ) ); 
  227.     cudaThreadSynchronize();
  228.     cudaFree( CUDAinput );
  229.     cudaFree( CUDAoutput );
  230.  
  231. }
  232.  
  233.  
  234.  
  235.  
  236.  
  237.  
  238.  
  239.  
  240.  
  241.  
  242.  
  243.  
  244.  
  245.  
  246.  
  247.  
  248.  
  249.  
  250.  
  251.  
  252.  
  253.  
  254.  
  255.  
  256.  
  257.  
  258.  
  259. // Demo kernel pro prevod barevneho bodu na odstin sede.
  260. __global__ void kernel_grayscale( uchar4 *color_pic, uchar4* bw_pic, uchar4* MyO ,int sizex, int sizey )
  261. {
  262.     // souradnice vlakna, kontrola rozmeru obrazku
  263.     int y = blockDim.y * blockIdx.y + threadIdx.y;
  264.     if ( y >= sizey ) return;
  265.     int x = blockDim.x * blockIdx.x + threadIdx.x;
  266.     if ( x >= sizex ) return;
  267.  
  268.     uchar4 bgr = color_pic[ y * sizex + x ];
  269.  
  270.  
  271.     uchar4 bgr1 = color_pic[ y * sizex + x ];
  272.  
  273.     // vsechny tri barevne slozky budou mit stejnou hodnotu
  274.     bgr.x = bgr.y = bgr.z = bgr.x * 0.11 + bgr.y * 0.59 + bgr.z * 0.30;
  275.    
  276.     // ulozeni bodu do obrazku
  277.     bw_pic[ y * sizex + x ] = bgr;
  278.  
  279.     bgr1.x =  bgr1.x * 0.75 ;
  280.     bgr1.y =  bgr1.y * 0.1 ;
  281.     bgr1.z =  bgr1.z * 0.75;
  282.  
  283.     MyO [y * sizex + x ] = bgr1;
  284.  
  285.  
  286. }
  287.  
  288.  
  289.  
  290.  
  291.  
  292.  
  293.  
  294.  
  295.  
  296.  
  297.  
  298.  
  299.  
  300.  
  301.  
  302.  
  303.  
  304.  
  305.  
  306.  
  307.  
  308.  
  309.  
  310.  
  311.  
  312.  
  313.  
  314.  
  315.  
  316.  
  317. void run_grayscale( uchar4 *color_pic, uchar4* bw_pic, uchar4* MyO, int sizex, int sizey )
  318. {
  319.     cudaError_t cerr;
  320.     // alokace pameti ve videokarte
  321.     uchar4 *cudaColorPic;
  322.     uchar4 *cudaBWPic;
  323.     //////////////////////////////
  324.     uchar4* CudaMyO;
  325.     /////////////////////////////////
  326.     ////////////////////////////////////////////////////////////////////////////////
  327.     cerr = cudaMalloc( &CudaMyO, sizex * sizey * sizeof( uchar4 ) );
  328.     if ( cerr != cudaSuccess )
  329.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  330.     /////////////////////////////////////////////////////////////////////////////////////
  331.     cerr = cudaMalloc( &cudaColorPic, sizex * sizey * sizeof( uchar4 ) );
  332.     if ( cerr != cudaSuccess )
  333.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  334.  
  335.     cerr = cudaMalloc( &cudaBWPic, sizex * sizey * sizeof( uchar4 ) );
  336.     if ( cerr != cudaSuccess )
  337.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  338.  
  339.     // prenos barevneho obrazku do videokarty
  340.     cerr = cudaMemcpy( cudaColorPic, color_pic, sizex * sizey * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  341.     if ( cerr != cudaSuccess )
  342.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  343.  
  344.     int block = 16;
  345.     dim3 blocks( ( sizex + block - 1 ) / block, ( sizey + block - 1 ) / block );
  346.     dim3 threads( block, block );
  347.  
  348.     // vytvoreni bloku s vlakny, matice vlaken muze byt vetsi, nez samotny obrazek!
  349.     kernel_grayscale<<< blocks, threads >>>( cudaColorPic, cudaBWPic,CudaMyO, sizex, sizey );
  350.  
  351.     ////////////////////////////////////////////////////////////////////////
  352.    
  353.     //////////////////////////////////////////////////////////////////////////////////
  354.  
  355.     if ( ( cerr = cudaGetLastError() ) != cudaSuccess )
  356.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  357.  
  358.     // prenos dat z videokarty
  359.     cerr = cudaMemcpy( bw_pic, cudaBWPic, sizex * sizey * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  360.     if ( cerr != cudaSuccess )
  361.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  362.  
  363.     //////////////////////////////////////////////////////////////////////////////////////
  364.     cerr = cudaMemcpy( MyO, CudaMyO, sizex * sizey * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  365.     if ( cerr != cudaSuccess )
  366.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  367.     //////////////////////////////////////////////////////////////////////////////////////
  368.  
  369.     // uvolneni alokovane pameti ve videokarte
  370.     cudaFree( cudaColorPic );
  371.     cudaFree( cudaBWPic );
  372.     cudaFree( CudaMyO);
  373.  
  374.  
  375. }
  376.  
  377.  
  378.  
  379. __global__ void prolnuti( uchar4 *first,uchar4 *second,uchar4 *final, int sizex, int sizey )
  380. {
  381.     // souradnice vlakna
  382.     int y = blockDim.y * blockIdx.y + threadIdx.y;
  383.     int x = blockDim.x * blockIdx.x + threadIdx.x;
  384.     if ( x >= sizex ) return;
  385.     if ( y >= sizey ) return;
  386.  
  387.     float perc2 = x / (float)sizex;
  388.  
  389.  
  390.    
  391.     final[y * sizex + x].x = first[y * sizex + x].x * perc2 + second[y * sizex + x].x * (1 - perc2);
  392.     final[y * sizex + x].y = first[y * sizex + x].y * perc2 + second[y * sizex + x].y * (1 - perc2);
  393.     final[y * sizex + x].z = first[y * sizex + x].z * perc2 + second[y * sizex + x].z * (1 - perc2);
  394.    
  395.  
  396. }
  397.  
  398. void joinImage( uchar4 *original, uchar4 *original2, uchar4 *joined, int width, int height )
  399. {
  400.     cudaError_t cerr;
  401.     // alokace pameti ve videokarte
  402.     uchar4 *cudaOriginal;
  403.     uchar4 *cudaOriginal2;
  404.     uchar4 *cudajoined;
  405.     cerr = cudaMalloc( &cudaOriginal, width * height * sizeof( uchar4 ) );
  406.     if ( cerr != cudaSuccess )
  407.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  408.  
  409.     cerr = cudaMalloc( &cudaOriginal2, width * height * sizeof( uchar4 ) );
  410.     if ( cerr != cudaSuccess )
  411.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  412.  
  413.     cerr = cudaMalloc( &cudajoined, width * height * sizeof( uchar4 ) );
  414.     if ( cerr != cudaSuccess )
  415.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  416.  
  417.     // prenos barevneho obrazku do videokarty
  418.     cerr = cudaMemcpy( cudaOriginal, original, width * height * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  419.     if ( cerr != cudaSuccess )
  420.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  421.  
  422.     cerr = cudaMemcpy( cudaOriginal2, original2, width * height * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  423.     if ( cerr != cudaSuccess )
  424.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  425.  
  426.     int block = 16;
  427.     dim3 blocks( ( width + block - 1 ) / block, ( height + block - 1 ) / block );
  428.     dim3 threads( block, block );
  429.  
  430.     // vytvoreni bloku s vlakny, matice vlaken muze byt vetsi, nez samotny obrazek!
  431.     prolnuti<<< blocks, threads >>>( cudaOriginal,cudaOriginal2, cudajoined, width, height );
  432.  
  433.     if ( ( cerr = cudaGetLastError() ) != cudaSuccess )
  434.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  435.  
  436.     // prenos dat z videokarty
  437.     cerr = cudaMemcpy( joined, cudajoined, width * height * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  438.     if ( cerr != cudaSuccess )
  439.         printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  440.  
  441.     // uvolneni alokovane pameti ve videokarte
  442.     cudaFree( cudajoined );
  443.     cudaFree( cudaOriginal );
  444.     cudaFree( cudaOriginal2 );
  445.  
  446. }
Add Comment
Please, Sign In to add comment