lu4kedr

edited2

Dec 7th, 2016
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 16.15 KB | None | 0 0
  1. // ***********************************************************************
  2. //
  3. // Demo program for education in subject
  4. // Computer Architectures and Paralel Systems.
  5. // Petr Olivka, dep. of Computer Science, FEI, VSB-TU Ostrava
  6. //
  7. // Example of CUDA Technology Usage.
  8. // Manipulation with prepared image.
  9. //
  10. // ***********************************************************************
  11.  
  12. #include <cuda_runtime.h>
  13. #include <device_launch_parameters.h>
  14. #include <stdio.h>
  15. #include "pic_type.h"
  16.  
  17. __global__ void kernel_vlozeni( CUDA_Pic velky, CUDA_Pic maly, CUDA_Pic vysledek, int2 pozice) {
  18. // X,Y coordinates
  19. int y = blockDim.y * blockIdx.y + threadIdx.y;
  20. int x = blockDim.x * blockIdx.x + threadIdx.x;
  21. if ( x >= velky.Size.x ) return;
  22. if ( y >= velky.Size.y ) return;
  23.  
  24. uchar4 bgr;
  25. if(x >= pozice.x && x <= pozice.x + maly.Size.x && y >= pozice.y && y <= pozice.y + maly.Size.y )
  26. bgr = maly.PData[ ( y - pozice.y ) * maly.Size.x + ( x - pozice.x ) ];
  27. else
  28. bgr = velky.PData[ y * velky.Size.x + x ];
  29.  
  30. // Store point [x,y] back to image
  31. vysledek.PData[ (y) * vysledek.Size.x + (x) ] = bgr;
  32.  
  33. }
  34.  
  35. void run_startAnimace(CUDA_Pic velky, CUDA_Pic &CUDAvelky, CUDA_Pic maly, CUDA_Pic &CUDAmaly, CUDA_Pic vysledek, CUDA_Pic &CUDAvysledek) {
  36. cudaError_t cerr;
  37.  
  38. CUDAvelky.Size = velky.Size;
  39. CUDAmaly.Size = maly.Size;
  40. CUDAvysledek.Size = vysledek.Size;
  41.  
  42.  
  43. // Memory allocation in GPU device
  44. cerr = cudaMalloc( &CUDAvelky.PData, CUDAvelky.Size.x * CUDAvelky.Size.y * sizeof( uchar4 ) );
  45. if ( cerr != cudaSuccess )
  46. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  47.  
  48. cerr = cudaMalloc( &CUDAmaly.PData, CUDAmaly.Size.x * CUDAmaly.Size.y * sizeof( uchar4 ) );
  49. if ( cerr != cudaSuccess )
  50. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  51.  
  52. cerr = cudaMalloc( &CUDAvysledek.PData, CUDAvysledek.Size.x * CUDAvysledek.Size.y * sizeof( uchar4 ) );
  53. if ( cerr != cudaSuccess )
  54. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  55.  
  56. // Copy data to GPU device
  57. cerr = cudaMemcpy( CUDAvelky.PData, velky.PData, CUDAvelky.Size.x * CUDAvelky.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  58. if ( cerr != cudaSuccess )
  59. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  60.  
  61.  
  62. cerr = cudaMemcpy( CUDAmaly.PData, maly.PData, CUDAmaly.Size.x * CUDAmaly.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  63. if ( ( cerr = cudaGetLastError() ) != cudaSuccess )
  64. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  65.  
  66.  
  67. cerr = cudaMemcpy( CUDAvysledek.PData, vysledek.PData, CUDAvysledek.Size.x * CUDAvysledek.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  68. if ( ( cerr = cudaGetLastError() ) != cudaSuccess )
  69. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  70.  
  71.  
  72. }
  73.  
  74. void run_prubehAnimace(CUDA_Pic velky, CUDA_Pic &CUDAvelky, CUDA_Pic maly, CUDA_Pic &CUDAmaly, CUDA_Pic vysledek, CUDA_Pic &CUDAvysledek, uint2 block_size, int2 pozice) {
  75. // Grid creation with computed organization
  76. CUDAvysledek.PData = CUDAvelky.PData;
  77. dim3 mrizka( ( CUDAmaly.Size.x + block_size.x - 1 ) / block_size.x, ( CUDAmaly.Size.y + block_size.y - 1 ) / block_size.y );
  78. kernel_vlozeni<<< mrizka, dim3( block_size.x, block_size.y ) >>>( CUDAvelky, CUDAmaly, CUDAvysledek, pozice);
  79.  
  80. cudaError_t cerr;
  81.  
  82. // Copy data from GPU device to PC
  83.  
  84. cerr = cudaMemcpy( vysledek.PData, CUDAvysledek.PData, vysledek.Size.x * vysledek.Size.y * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  85. if ( ( cerr = cudaGetLastError() ) != cudaSuccess )
  86. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  87. }
  88.  
  89. void run_konecAnimace(CUDA_Pic CUDAvelky, CUDA_Pic CUDAmaly, CUDA_Pic CUDAvysledek) {
  90.  
  91. cudaFree( CUDAvelky.PData );
  92. cudaFree( CUDAmaly.PData );
  93. cudaFree( CUDAvysledek.PData );
  94. }
  95.  
  96.  
  97.  
  98.  
  99.  
  100.  
  101.  
  102.  
  103.  
  104.  
  105.  
  106.  
  107.  
  108.  
  109.  
  110.  
  111.  
  112.  
  113.  
  114.  
  115.  
  116.  
  117.  
  118.  
  119.  
  120.  
  121.  
  122.  
  123. /*__global__ void kernel_rozmazani( CUDA_Pic stary, CUDA_Pic novy) {
  124. // X,Y coordinates
  125. int y = blockDim.y * blockIdx.y + threadIdx.y + 1;
  126. int x = blockDim.x * blockIdx.x + threadIdx.x + 1;
  127. if ( x > stary.Size.x - 1 ) return;
  128. if ( y > stary.Size.y - 1 ) return;
  129.  
  130. // zmenseni
  131.  
  132.  
  133. // Store point [x,y] back to image
  134. novy.PData[ y * novy.Size.x + x ] = bgr;
  135.  
  136. }
  137.  
  138. void run_rozmazani(CUDA_Pic stary, CUDA_Pic novy, uint2 block_size) {
  139. cudaError_t cerr;
  140.  
  141. CUDA_Pic staryPic;
  142. staryPic.Size = stary.Size;
  143. CUDA_Pic novyPic;
  144. novyPic.Size = novy.Size;
  145.  
  146. // Memory allocation in GPU device
  147. cerr = cudaMalloc( &staryPic.PData, staryPic.Size.x * staryPic.Size.y * sizeof( uchar4 ) );
  148. if ( cerr != cudaSuccess )
  149. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  150.  
  151. cerr = cudaMalloc( &novyPic.PData, novyPic.Size.x * novyPic.Size.y * sizeof( uchar4 ) );
  152. if ( cerr != cudaSuccess )
  153. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  154.  
  155. // Copy data to GPU device
  156. cerr = cudaMemcpy( staryPic.PData, stary.PData, staryPic.Size.x * staryPic.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  157. if ( cerr != cudaSuccess )
  158. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  159.  
  160. cerr = cudaMemcpy( novyPic.PData, novy.PData, novyPic.Size.x * novyPic.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  161. if ( cerr != cudaSuccess )
  162. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  163.  
  164. // Grid creation with computed organization
  165. dim3 mrizka( ( novyPic.Size.x + block_size.x - 1 ) / block_size.x, ( novyPic.Size.y + block_size.y - 1 ) / block_size.y );
  166. kernel_rozmazani<<< mrizka, dim3( block_size.x, block_size.y ) >>>( staryPic, novyPic );
  167.  
  168. if ( ( cerr = cudaGetLastError() ) != cudaSuccess )
  169. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  170.  
  171. // Copy data from GPU device to PC
  172. cerr = cudaMemcpy( stary.PData, staryPic.PData, stary.Size.x * stary.Size.y * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  173. if ( cerr != cudaSuccess )
  174. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  175.  
  176. cerr = cudaMemcpy( novy.PData, novyPic.PData, novy.Size.x * novy.Size.y * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  177. if ( cerr != cudaSuccess )
  178. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  179.  
  180. // Free memory
  181. cudaFree( staryPic.PData );
  182. cudaFree( novyPic.PData );
  183.  
  184. }*/
  185.  
  186. __global__ void kernel_vlozeni( CUDA_Pic velky, CUDA_Pic maly, int2 pozice) {
  187. // X,Y coordinates
  188. int y = blockDim.y * blockIdx.y + threadIdx.y;
  189. int x = blockDim.x * blockIdx.x + threadIdx.x;
  190. if ( x >= maly.Size.x ) return;
  191. if ( y >= maly.Size.y ) return;
  192.  
  193. // zmenseni
  194. uchar4 bgr = maly.PData[ y * maly.Size.x + x ];
  195.  
  196.  
  197. // Store point [x,y] back to image
  198. velky.PData[ (y + pozice.y) * velky.Size.x + (x + pozice.x) ] = bgr;
  199.  
  200. }
  201.  
  202. void run_vlozeni(CUDA_Pic stary, CUDA_Pic novy, uint2 block_size, int2 pozice) {
  203. cudaError_t cerr;
  204.  
  205. CUDA_Pic staryPic;
  206. staryPic.Size = stary.Size;
  207. CUDA_Pic novyPic;
  208. novyPic.Size = novy.Size;
  209.  
  210. // Memory allocation in GPU device
  211. cerr = cudaMalloc( &staryPic.PData, staryPic.Size.x * staryPic.Size.y * sizeof( uchar4 ) );
  212. if ( cerr != cudaSuccess )
  213. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  214.  
  215. cerr = cudaMalloc( &novyPic.PData, novyPic.Size.x * novyPic.Size.y * sizeof( uchar4 ) );
  216. if ( cerr != cudaSuccess )
  217. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  218.  
  219. // Copy data to GPU device
  220. cerr = cudaMemcpy( staryPic.PData, stary.PData, staryPic.Size.x * staryPic.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  221. if ( cerr != cudaSuccess )
  222. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  223.  
  224. cerr = cudaMemcpy( novyPic.PData, novy.PData, novyPic.Size.x * novyPic.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  225. if ( cerr != cudaSuccess )
  226. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  227.  
  228. // Grid creation with computed organization
  229. dim3 mrizka( ( staryPic.Size.x + block_size.x - 1 ) / block_size.x, ( staryPic.Size.y + block_size.y - 1 ) / block_size.y );
  230. kernel_vlozeni<<< mrizka, dim3( block_size.x, block_size.y ) >>>( staryPic, novyPic, pozice );
  231.  
  232. if ( ( cerr = cudaGetLastError() ) != cudaSuccess )
  233. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  234.  
  235. // Copy data from GPU device to PC
  236. cerr = cudaMemcpy( stary.PData, staryPic.PData, stary.Size.x * stary.Size.y * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  237. if ( cerr != cudaSuccess )
  238. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  239.  
  240. cerr = cudaMemcpy( novy.PData, novyPic.PData, novy.Size.x * novy.Size.y * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  241. if ( cerr != cudaSuccess )
  242. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  243.  
  244. // Free memory
  245. cudaFree( staryPic.PData );
  246. cudaFree( novyPic.PData );
  247.  
  248. }
  249.  
  250. __global__ void kernel_zmenseni( CUDA_Pic stary, CUDA_Pic novy) {
  251. // X,Y coordinates
  252. int y = blockDim.y * blockIdx.y + threadIdx.y;
  253. int x = blockDim.x * blockIdx.x + threadIdx.x;
  254. if ( x > stary.Size.x ) return;
  255. if ( y > stary.Size.y ) return;
  256.  
  257. // zmenseni
  258. uchar4 bgr = stary.PData[ ((2 * y) + 1) * stary.Size.x + ((2 * x) + 1) ];
  259.  
  260. // Store point [x,y] back to image
  261. novy.PData[ y * novy.Size.x + x ] = bgr;
  262.  
  263. }
  264.  
  265. void run_zmenseni(CUDA_Pic stary, CUDA_Pic novy, uint2 block_size) {
  266. cudaError_t cerr;
  267.  
  268. CUDA_Pic staryPic;
  269. staryPic.Size = stary.Size;
  270. CUDA_Pic novyPic;
  271. novyPic.Size = novy.Size;
  272.  
  273. // Memory allocation in GPU device
  274. cerr = cudaMalloc( &staryPic.PData, staryPic.Size.x * staryPic.Size.y * sizeof( uchar4 ) );
  275. if ( cerr != cudaSuccess )
  276. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  277.  
  278. cerr = cudaMalloc( &novyPic.PData, novyPic.Size.x * novyPic.Size.y * sizeof( uchar4 ) );
  279. if ( cerr != cudaSuccess )
  280. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  281.  
  282. // Copy data to GPU device
  283. cerr = cudaMemcpy( staryPic.PData, stary.PData, staryPic.Size.x * staryPic.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  284. if ( cerr != cudaSuccess )
  285. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  286.  
  287. cerr = cudaMemcpy( novyPic.PData, novy.PData, novyPic.Size.x * novyPic.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  288. if ( cerr != cudaSuccess )
  289. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  290.  
  291. // Grid creation with computed organization
  292. dim3 mrizka( ( novyPic.Size.x + block_size.x - 1 ) / block_size.x, ( novyPic.Size.y + block_size.y - 1 ) / block_size.y );
  293. kernel_zmenseni<<< mrizka, dim3( block_size.x, block_size.y ) >>>( staryPic, novyPic );
  294.  
  295. if ( ( cerr = cudaGetLastError() ) != cudaSuccess )
  296. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  297.  
  298. // Copy data from GPU device to PC
  299. cerr = cudaMemcpy( stary.PData, staryPic.PData, stary.Size.x * stary.Size.y * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  300. if ( cerr != cudaSuccess )
  301. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  302.  
  303. cerr = cudaMemcpy( novy.PData, novyPic.PData, novy.Size.x * novy.Size.y * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  304. if ( cerr != cudaSuccess )
  305. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  306.  
  307. // Free memory
  308. cudaFree( staryPic.PData );
  309. cudaFree( novyPic.PData );
  310.  
  311. }
  312.  
  313.  
  314.  
  315.  
  316.  
  317.  
  318.  
  319.  
  320.  
  321.  
  322.  
  323.  
  324.  
  325.  
  326.  
  327.  
  328.  
  329.  
  330.  
  331.  
  332.  
  333.  
  334.  
  335.  
  336.  
  337.  
  338.  
  339.  
  340.  
  341.  
  342.  
  343.  
  344.  
  345.  
  346.  
  347.  
  348.  
  349.  
  350. __global__ void kernel_flip( CUDA_Pic cuda_pic, int hor_ver) {
  351. if(hor_ver == 0) {
  352. // X,Y coordinates
  353. int y = blockDim.y * blockIdx.y + threadIdx.y;
  354. int x = blockDim.x * blockIdx.x + threadIdx.x;
  355. if ( x >= cuda_pic.Size.x/2 ) return;
  356. if ( y >= cuda_pic.Size.y ) return;
  357.  
  358. // Point [x,y] selection from image
  359. uchar4 bgr = cuda_pic.PData[ y * cuda_pic.Size.x + x ];
  360. uchar4 bgr2 = cuda_pic.PData[ (y) * cuda_pic.Size.x + (cuda_pic.Size.x - x) ];
  361.  
  362. // Store point [x,y] back to image
  363. cuda_pic.PData[ y * cuda_pic.Size.x + x ] = bgr2;
  364. cuda_pic.PData[ (y) * cuda_pic.Size.x + (cuda_pic.Size.x - x) ] = bgr;
  365. }
  366. else
  367. {
  368. // X,Y coordinates
  369. int y = blockDim.y * blockIdx.y + threadIdx.y;
  370. int x = blockDim.x * blockIdx.x + threadIdx.x;
  371. if ( x >= cuda_pic.Size.x ) return;
  372. if ( y >= cuda_pic.Size.y/2 ) return;
  373.  
  374. // Point [x,y] selection from image
  375. uchar4 bgr = cuda_pic.PData[ y * cuda_pic.Size.x + x ];
  376. uchar4 bgr2 = cuda_pic.PData[ (cuda_pic.Size.y - y) * cuda_pic.Size.x + (x) ];
  377.  
  378. // Store point [x,y] back to image
  379. cuda_pic.PData[ y * cuda_pic.Size.x + x ] = bgr2;
  380. cuda_pic.PData[ (cuda_pic.Size.y - y) * cuda_pic.Size.x + (x) ] = bgr;
  381.  
  382. }
  383. }
  384.  
  385. void run_flip( CUDA_Pic pic, uint2 block_size, int flip) {
  386. cudaError_t cerr;
  387.  
  388. CUDA_Pic cudaPic;
  389. cudaPic.Size = pic.Size;
  390.  
  391. // Memory allocation in GPU device
  392. cerr = cudaMalloc( &cudaPic.PData, cudaPic.Size.x * cudaPic.Size.y * sizeof( uchar4 ) );
  393. if ( cerr != cudaSuccess )
  394. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  395.  
  396. // Copy data to GPU device
  397. cerr = cudaMemcpy( cudaPic.PData, pic.PData, cudaPic.Size.x * cudaPic.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  398. if ( cerr != cudaSuccess )
  399. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  400.  
  401. // Grid creation with computed organization
  402. dim3 mrizka( ( cudaPic.Size.x + block_size.x - 1 ) / block_size.x, ( cudaPic.Size.y + block_size.y - 1 ) / block_size.y );
  403. kernel_flip<<< mrizka, dim3( block_size.x, block_size.y ) >>>( cudaPic, flip );
  404.  
  405. // Copy data to CPU device
  406. cerr = cudaMemcpy( pic.PData, cudaPic.PData, pic.Size.x * pic.Size.y * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  407. if ( cerr != cudaSuccess )
  408. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  409.  
  410. cudaFree( cudaPic.PData );
  411. }
  412.  
  413.  
  414.  
  415.  
  416.  
  417.  
  418.  
  419.  
  420.  
  421.  
  422.  
  423.  
  424.  
  425.  
  426.  
  427.  
  428.  
  429.  
  430.  
  431.  
  432.  
  433.  
  434.  
  435.  
  436.  
  437.  
  438.  
  439.  
  440.  
  441. // Every threads identifies its position in grid and in block and modify image
  442. __global__ void kernel_animation( CUDA_Pic cuda_pic )
  443. {
  444. // X,Y coordinates
  445. int y = blockDim.y * blockIdx.y + threadIdx.y;
  446. int x = blockDim.x * blockIdx.x + threadIdx.x;
  447. if ( x >= cuda_pic.Size.x ) return;
  448. if ( y >= cuda_pic.Size.y ) return;
  449.  
  450. // Point [x,y] selection from image
  451. uchar4 bgr = cuda_pic.PData[ y * cuda_pic.Size.x + x ];
  452.  
  453. // Color rotation inside block
  454. int x2 = blockDim.x / 2;
  455. int y2 = blockDim.y / 2;
  456. int px = __sad( x2, threadIdx.x, 0 ); // abs function
  457. int py = __sad( y2, threadIdx.y, 0 );
  458.  
  459. if ( px < x2 * ( y2 - py ) / y2 )
  460. {
  461. uchar4 tmp = bgr;
  462. bgr.x = tmp.y;
  463. bgr.y = tmp.z;
  464. bgr.z = tmp.x;
  465. }
  466.  
  467. // Store point [x,y] back to image
  468. cuda_pic.PData[ y * cuda_pic.Size.x + x ] = bgr;
  469.  
  470. }
  471.  
  472. void run_animation( CUDA_Pic pic, uint2 block_size )
  473. {
  474. cudaError_t cerr;
  475.  
  476. CUDA_Pic cudaPic;
  477. cudaPic.Size = pic.Size;
  478.  
  479. // Memory allocation in GPU device
  480. cerr = cudaMalloc( &cudaPic.PData, cudaPic.Size.x * cudaPic.Size.y * sizeof( uchar4 ) );
  481. if ( cerr != cudaSuccess )
  482. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  483.  
  484. // Copy data to GPU device
  485. cerr = cudaMemcpy( cudaPic.PData, pic.PData, cudaPic.Size.x * cudaPic.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  486. if ( cerr != cudaSuccess )
  487. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  488.  
  489. // Grid creation with computed organization
  490. dim3 mrizka( ( cudaPic.Size.x + block_size.x - 1 ) / block_size.x, ( cudaPic.Size.y + block_size.y - 1 ) / block_size.y );
  491. kernel_animation<<< mrizka, dim3( block_size.x, block_size.y ) >>>( cudaPic );
  492.  
  493. if ( ( cerr = cudaGetLastError() ) != cudaSuccess )
  494. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  495.  
  496. // Copy data from GPU device to PC
  497. cerr = cudaMemcpy( pic.PData, cudaPic.PData, pic.Size.x * pic.Size.y * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  498. if ( cerr != cudaSuccess )
  499. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  500.  
  501. // Free memory
  502. cudaFree( cudaPic.PData );
  503.  
  504. // For printf
  505. //cudaDeviceSynchronize();
  506.  
  507. }
Add Comment
Please, Sign In to add comment