Guest User

Untitled

a guest
Nov 26th, 2014
180
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.53 KB | None | 0 0
  1. // ***********************************************************************
  2. //
  3. // Demo program pro vyuku predmetu APPS (10/2021)
  4. // Petr Olivka, katedra informatiky, FEI, VSB-TU Ostrava
  5. //
  6. // Priklad pouziti CUDA technologie.
  7. // Prevod barevneho obrazku na odstiny sede barvy
  8. //
  9. // ***********************************************************************
  10.  
  11. #include <cuda_runtime.h>
  12. #include <stdio.h>
  13.  
  14. // Demo kernel pro prevod barevneho bodu na odstin sede.
  15. __global__ void kernel_grayscale( uchar4 *color_pic, uchar4* bw_pic, int sizex, int sizey )
  16. {
  17. // souradnice vlakna, kontrola rozmeru obrazku
  18. int y = blockDim.y * blockIdx.y + threadIdx.y;
  19. if ( y >= sizey ) return;
  20. int x = blockDim.x * blockIdx.x + threadIdx.x;
  21. if ( x >= sizex ) return;
  22.  
  23. uchar4 bgr = color_pic[ y * sizex + x ];
  24.  
  25. // vsechny tri barevne slozky budou mit stejnou hodnotu
  26. bgr.x = bgr.y = bgr.z = bgr.x * 0.11 + bgr.y * 0.59 + bgr.z * 0.30;
  27.  
  28. // ulozeni bodu do obrazku
  29. bw_pic[ y * sizex + x ] = bgr;
  30.  
  31. }
  32.  
  33.  
  34.  
  35.  
  36. __global__ void kernel_otoc( uchar4 *color_pic, uchar4* newimg, int sizex, int sizey )
  37. {
  38. // souradnice vlakna, kontrola rozmeru obrazku
  39. int y = blockDim.y * blockIdx.y + threadIdx.y;
  40. if ( y >= sizey ) return;
  41. int x = blockDim.x * blockIdx.x + threadIdx.x;
  42. if ( x > (sizex/2) ) return;
  43.  
  44. int xDruhaPulka = ((sizex/2)-x)+(sizex/2);
  45.  
  46. newimg[ y * sizex + x ] =color_pic[ y * sizex +xDruhaPulka ];
  47. newimg[ y * sizex + xDruhaPulka ] =color_pic[ y * sizex +x ];
  48.  
  49. /*if(sizex%2 != 0)
  50. {
  51. if(x = sizex/2)
  52. newimg[ y * x] =color_pic[ y * +x ];
  53. }*/
  54.  
  55.  
  56. }
  57.  
  58. __global__ void kernel_vyrez(uchar4 *in, uchar4* out, int2 size, int4 vyrez)
  59. {
  60. int y = blockDim.y * blockIdx.y + threadIdx.y;
  61. int x = blockDim.x * blockIdx.x + threadIdx.x;
  62.  
  63. if ( y >= size.y) return;
  64. if ( x >= size.x) return;
  65. out[size.x * y +x] = in[0];
  66.  
  67.  
  68. if ( y >= vyrez.z || y <= vyrez.y) return;
  69. if ( x >= vyrez.w || x <= vyrez.x) return;
  70.  
  71.  
  72.  
  73.  
  74.  
  75.  
  76. /* if ( y >= vyrez.z || y <= vyrez.y) return;
  77. int x = blockDim.x * blockIdx.x + threadIdx.x;
  78. if ( x >= vyrez.w || x <= vyrez.x) return;
  79.  
  80. if ( y >= size.y) return;
  81. int x = blockDim.x * blockIdx.x + threadIdx.x;
  82. if ( x >= size.x) return;
  83. */
  84.  
  85. int souradniceVyrezuX = x - vyrez.x;
  86. int souradniceVyrezuY = y - vyrez.y;
  87.  
  88.  
  89. int vyrezSizex = vyrez.w - vyrez.x;
  90.  
  91. out[vyrezSizex * souradniceVyrezuY + souradniceVyrezuX] = in[size.x * y +x];
  92.  
  93. //out[size.x * y +x] = in[size.x * y +x];
  94.  
  95. }
  96.  
  97. __global__ void kernel_roztrhni(uchar4 *orig, uchar4* newimg1, uchar4* newimg2, int sizex, int sizey)
  98. {
  99. int y = blockDim.y * blockIdx.y + threadIdx.y;
  100. if ( y >= sizey ) return;
  101. int x = blockDim.x * blockIdx.x + threadIdx.x;
  102. if ( x >= sizex) return;
  103.  
  104.  
  105. if(x > sizex/2)
  106. {
  107. newimg1[ y * (sizex/2) + x - (sizex/2) ] = orig[ y * sizex + x ];
  108. }
  109. else
  110. {
  111. newimg2[ y * (sizex/2) + x ] = orig[ y * sizex + x ];
  112. }
  113. }
  114.  
  115. void run_grayscale( uchar4 *original, uchar4* output, int2 size, int4 vyrez)
  116. {
  117. cudaError_t cerr;
  118.  
  119. uchar4 *cudaIN;
  120. uchar4 *cudaOUT;
  121.  
  122. cerr = cudaMalloc( &cudaIN, size.x * size.y * sizeof( uchar4 ) );
  123. if ( cerr != cudaSuccess )
  124. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  125.  
  126.  
  127.  
  128. cerr = cudaMalloc( &cudaOUT, (vyrez.w-vyrez.x) * (vyrez.z-vyrez.y) * sizeof( uchar4 ) );
  129. if ( cerr != cudaSuccess )
  130. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  131.  
  132.  
  133.  
  134. /* cerr = cudaMalloc( &cudaOUT, size.x *size.y * sizeof( uchar4 ) );
  135. if ( cerr != cudaSuccess )
  136. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) ); */
  137.  
  138. // prenos barevneho obrazku do videokarty
  139. cerr = cudaMemcpy( cudaIN, original, size.x * size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  140. if ( cerr != cudaSuccess )
  141. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  142.  
  143.  
  144.  
  145.  
  146. int block = 16;
  147. dim3 blocks(size.x,size.y);
  148. dim3 threads( block, block );
  149.  
  150.  
  151. printf( "predpustenim %d %d\n", size.x, size.y);
  152. printf( "predpustenim %d %d, %d, %d\n", vyrez.x, vyrez.y, vyrez.w, vyrez.z);
  153. // vytvoreni bloku s vlakny, matice vlaken muze byt vetsi, nez samotny obrazek!
  154. kernel_vyrez<<< blocks, threads >>>( cudaIN, cudaOUT, size, vyrez);
  155.  
  156.  
  157. /* cerr = cudaMemcpy(output, cudaOUT, ( size.x )* (size.y) * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  158. if ( cerr != cudaSuccess )
  159. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) ); */
  160.  
  161.  
  162.  
  163. cerr = cudaMemcpy(output, cudaOUT, (vyrez.w-vyrez.x) * (vyrez.z-vyrez.y) * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  164. if ( cerr != cudaSuccess )
  165. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  166.  
  167.  
  168. cudaFree( cudaIN );
  169. cudaFree( cudaOUT );
  170.  
  171.  
  172.  
  173.  
  174.  
  175.  
  176.  
  177.  
  178. // alokace pameti ve videokarte
  179. /*uchar4 *cudaColorPic;
  180. uchar4 *cudaBWPic;
  181. cerr = cudaMalloc( &cudaColorPic, sizex * sizey * sizeof( uchar4 ) );
  182. if ( cerr != cudaSuccess )
  183. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  184.  
  185. cerr = cudaMalloc( &cudaBWPic, sizex * sizey * sizeof( uchar4 ) );
  186. if ( cerr != cudaSuccess )
  187. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  188.  
  189. // prenos barevneho obrazku do videokarty
  190. cerr = cudaMemcpy( cudaColorPic, color_pic, sizex * sizey * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  191. if ( cerr != cudaSuccess )
  192. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  193.  
  194. int block = 16;
  195. dim3 blocks( ((( sizex + block - 1 ) / block)/2)+1, ( sizey + block - 1 ) / block );
  196. dim3 threads( block, block );
  197.  
  198. // vytvoreni bloku s vlakny, matice vlaken muze byt vetsi, nez samotny obrazek!
  199. kernel_otoc<<< blocks, threads >>>( cudaColorPic, cudaBWPic, sizex, sizey );
  200.  
  201. if ( ( cerr = cudaGetLastError() ) != cudaSuccess )
  202. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  203.  
  204. // prenos dat z videokarty
  205. cerr = cudaMemcpy( bw_pic, cudaBWPic, sizex * sizey * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  206. if ( cerr != cudaSuccess )
  207. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  208.  
  209.  
  210.  
  211.  
  212. */
  213.  
  214. printf( "OTOC OK" );
  215.  
  216.  
  217.  
  218. // ROZTRZENI
  219. /* uchar4 *cudaImg1;
  220. cerr = cudaMalloc( &cudaImg1, sizex* sizey * sizeof( uchar4 ) );
  221. if ( cerr != cudaSuccess )
  222. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  223.  
  224. uchar4 *cudaImg2;
  225. cerr = cudaMalloc( &cudaImg2, sizex* sizey * sizeof( uchar4 ) );
  226. if ( cerr != cudaSuccess )
  227. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  228.  
  229. printf( "alok OK\n" );
  230.  
  231. block = 16;
  232. dim3 blocks2( (( sizex + block - 1 ) / block), ( sizey + block - 1 ) / block );
  233. dim3 threads2( block, block );
  234.  
  235.  
  236. printf( "predpustenim\n" );
  237. // vytvoreni bloku s vlakny, matice vlaken muze byt vetsi, nez samotny obrazek!
  238. kernel_roztrhni<<< blocks2, threads2 >>>( cudaColorPic, cudaImg1, cudaImg2, sizex, sizey );
  239.  
  240. printf( "pospusteni\n" );
  241.  
  242. cerr = cudaMemcpy( img1, cudaImg1, (sizex/2) * sizey * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  243. if ( cerr != cudaSuccess )
  244. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  245.  
  246. printf( "ffffffff\n" );
  247.  
  248. cerr = cudaMemcpy( img2, cudaImg2, (sizex/2) * sizey * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  249. if ( cerr != cudaSuccess )
  250. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  251.  
  252. printf( "gggggggggg\n" );
  253. // uvolneni alokovane pameti ve videokarte
  254. cudaFree( cudaImg1 );
  255. cudaFree( cudaImg2 );
  256. printf( "hhhhhh\n" );
  257. cudaFree( cudaColorPic );
  258. cudaFree( cudaBWPic );*/
  259. }
Advertisement
Add Comment
Please, Sign In to add comment