lu4kedr

edited2

Dec 7th, 2016
72
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 16.15 KB | None | 0 0
  1. // ***********************************************************************
  2. //
  3. // Demo program for education in subject
  4. // Computer Architectures and Paralel Systems.
  5. // Petr Olivka, dep. of Computer Science, FEI, VSB-TU Ostrava
  6. // email:petr.olivka@vsb.cz
  7. //
  8. // Example of CUDA Technology Usage.
  9. // Manipulation with prepared image.
  10. //
  11. // ***********************************************************************
  12.  
  13. #include <cuda_runtime.h>
  14. #include <device_launch_parameters.h>
  15. #include <stdio.h>
  16. #include "pic_type.h"
  17.  
  18. __global__ void kernel_vlozeni( CUDA_Pic velky, CUDA_Pic maly, CUDA_Pic vysledek, int2 pozice) {
  19. // X,Y coordinates
  20. int y = blockDim.y * blockIdx.y + threadIdx.y;
  21. int x = blockDim.x * blockIdx.x + threadIdx.x;
  22. if ( x >= velky.Size.x ) return;
  23. if ( y >= velky.Size.y ) return;
  24.  
  25. uchar4 bgr;
  26. if(x >= pozice.x && x <= pozice.x + maly.Size.x && y >= pozice.y && y <= pozice.y + maly.Size.y )
  27. bgr = maly.PData[ ( y - pozice.y ) * maly.Size.x + ( x - pozice.x ) ];
  28. else
  29. bgr = velky.PData[ y * velky.Size.x + x ];
  30.  
  31. // Store point [x,y] back to image
  32. vysledek.PData[ (y) * vysledek.Size.x + (x) ] = bgr;
  33.  
  34. }
  35.  
  36. void run_startAnimace(CUDA_Pic velky, CUDA_Pic &CUDAvelky, CUDA_Pic maly, CUDA_Pic &CUDAmaly, CUDA_Pic vysledek, CUDA_Pic &CUDAvysledek) {
  37. cudaError_t cerr;
  38.  
  39. CUDAvelky.Size = velky.Size;
  40. CUDAmaly.Size = maly.Size;
  41. CUDAvysledek.Size = vysledek.Size;
  42.  
  43.  
  44. // Memory allocation in GPU device
  45. cerr = cudaMalloc( &CUDAvelky.PData, CUDAvelky.Size.x * CUDAvelky.Size.y * sizeof( uchar4 ) );
  46. if ( cerr != cudaSuccess )
  47. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  48.  
  49. cerr = cudaMalloc( &CUDAmaly.PData, CUDAmaly.Size.x * CUDAmaly.Size.y * sizeof( uchar4 ) );
  50. if ( cerr != cudaSuccess )
  51. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  52.  
  53. cerr = cudaMalloc( &CUDAvysledek.PData, CUDAvysledek.Size.x * CUDAvysledek.Size.y * sizeof( uchar4 ) );
  54. if ( cerr != cudaSuccess )
  55. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  56.  
  57. // Copy data to GPU device
  58. cerr = cudaMemcpy( CUDAvelky.PData, velky.PData, CUDAvelky.Size.x * CUDAvelky.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  59. if ( cerr != cudaSuccess )
  60. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  61.  
  62.  
  63. cerr = cudaMemcpy( CUDAmaly.PData, maly.PData, CUDAmaly.Size.x * CUDAmaly.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  64. if ( ( cerr = cudaGetLastError() ) != cudaSuccess )
  65. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  66.  
  67.  
  68. cerr = cudaMemcpy( CUDAvysledek.PData, vysledek.PData, CUDAvysledek.Size.x * CUDAvysledek.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  69. if ( ( cerr = cudaGetLastError() ) != cudaSuccess )
  70. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  71.  
  72.  
  73. }
  74.  
  75. void run_prubehAnimace(CUDA_Pic velky, CUDA_Pic &CUDAvelky, CUDA_Pic maly, CUDA_Pic &CUDAmaly, CUDA_Pic vysledek, CUDA_Pic &CUDAvysledek, uint2 block_size, int2 pozice) {
  76. // Grid creation with computed organization
  77. CUDAvysledek.PData = CUDAvelky.PData;
  78. dim3 mrizka( ( CUDAmaly.Size.x + block_size.x - 1 ) / block_size.x, ( CUDAmaly.Size.y + block_size.y - 1 ) / block_size.y );
  79. kernel_vlozeni<<< mrizka, dim3( block_size.x, block_size.y ) >>>( CUDAvelky, CUDAmaly, CUDAvysledek, pozice);
  80.  
  81. cudaError_t cerr;
  82.  
  83. // Copy data from GPU device to PC
  84.  
  85. cerr = cudaMemcpy( vysledek.PData, CUDAvysledek.PData, vysledek.Size.x * vysledek.Size.y * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  86. if ( ( cerr = cudaGetLastError() ) != cudaSuccess )
  87. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  88. }
  89.  
  90. void run_konecAnimace(CUDA_Pic CUDAvelky, CUDA_Pic CUDAmaly, CUDA_Pic CUDAvysledek) {
  91.  
  92. cudaFree( CUDAvelky.PData );
  93. cudaFree( CUDAmaly.PData );
  94. cudaFree( CUDAvysledek.PData );
  95. }
  96.  
  97.  
  98.  
  99.  
  100.  
  101.  
  102.  
  103.  
  104.  
  105.  
  106.  
  107.  
  108.  
  109.  
  110.  
  111.  
  112.  
  113.  
  114.  
  115.  
  116.  
  117.  
  118.  
  119.  
  120.  
  121.  
  122.  
  123.  
  124. /*__global__ void kernel_rozmazani( CUDA_Pic stary, CUDA_Pic novy) {
  125. // X,Y coordinates
  126. int y = blockDim.y * blockIdx.y + threadIdx.y + 1;
  127. int x = blockDim.x * blockIdx.x + threadIdx.x + 1;
  128. if ( x > stary.Size.x - 1 ) return;
  129. if ( y > stary.Size.y - 1 ) return;
  130.  
  131. // zmenseni
  132.  
  133.  
  134. // Store point [x,y] back to image
  135. novy.PData[ y * novy.Size.x + x ] = bgr;
  136.  
  137. }
  138.  
  139. void run_rozmazani(CUDA_Pic stary, CUDA_Pic novy, uint2 block_size) {
  140. cudaError_t cerr;
  141.  
  142. CUDA_Pic staryPic;
  143. staryPic.Size = stary.Size;
  144. CUDA_Pic novyPic;
  145. novyPic.Size = novy.Size;
  146.  
  147. // Memory allocation in GPU device
  148. cerr = cudaMalloc( &staryPic.PData, staryPic.Size.x * staryPic.Size.y * sizeof( uchar4 ) );
  149. if ( cerr != cudaSuccess )
  150. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  151.  
  152. cerr = cudaMalloc( &novyPic.PData, novyPic.Size.x * novyPic.Size.y * sizeof( uchar4 ) );
  153. if ( cerr != cudaSuccess )
  154. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  155.  
  156. // Copy data to GPU device
  157. cerr = cudaMemcpy( staryPic.PData, stary.PData, staryPic.Size.x * staryPic.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  158. if ( cerr != cudaSuccess )
  159. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  160.  
  161. cerr = cudaMemcpy( novyPic.PData, novy.PData, novyPic.Size.x * novyPic.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  162. if ( cerr != cudaSuccess )
  163. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  164.  
  165. // Grid creation with computed organization
  166. dim3 mrizka( ( novyPic.Size.x + block_size.x - 1 ) / block_size.x, ( novyPic.Size.y + block_size.y - 1 ) / block_size.y );
  167. kernel_rozmazani<<< mrizka, dim3( block_size.x, block_size.y ) >>>( staryPic, novyPic );
  168.  
  169. if ( ( cerr = cudaGetLastError() ) != cudaSuccess )
  170. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  171.  
  172. // Copy data from GPU device to PC
  173. cerr = cudaMemcpy( stary.PData, staryPic.PData, stary.Size.x * stary.Size.y * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  174. if ( cerr != cudaSuccess )
  175. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  176.  
  177. cerr = cudaMemcpy( novy.PData, novyPic.PData, novy.Size.x * novy.Size.y * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  178. if ( cerr != cudaSuccess )
  179. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  180.  
  181. // Free memory
  182. cudaFree( staryPic.PData );
  183. cudaFree( novyPic.PData );
  184.  
  185. }*/
  186.  
  187. __global__ void kernel_vlozeni( CUDA_Pic velky, CUDA_Pic maly, int2 pozice) {
  188. // X,Y coordinates
  189. int y = blockDim.y * blockIdx.y + threadIdx.y;
  190. int x = blockDim.x * blockIdx.x + threadIdx.x;
  191. if ( x >= maly.Size.x ) return;
  192. if ( y >= maly.Size.y ) return;
  193.  
  194. // zmenseni
  195. uchar4 bgr = maly.PData[ y * maly.Size.x + x ];
  196.  
  197.  
  198. // Store point [x,y] back to image
  199. velky.PData[ (y + pozice.y) * velky.Size.x + (x + pozice.x) ] = bgr;
  200.  
  201. }
  202.  
  203. void run_vlozeni(CUDA_Pic stary, CUDA_Pic novy, uint2 block_size, int2 pozice) {
  204. cudaError_t cerr;
  205.  
  206. CUDA_Pic staryPic;
  207. staryPic.Size = stary.Size;
  208. CUDA_Pic novyPic;
  209. novyPic.Size = novy.Size;
  210.  
  211. // Memory allocation in GPU device
  212. cerr = cudaMalloc( &staryPic.PData, staryPic.Size.x * staryPic.Size.y * sizeof( uchar4 ) );
  213. if ( cerr != cudaSuccess )
  214. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  215.  
  216. cerr = cudaMalloc( &novyPic.PData, novyPic.Size.x * novyPic.Size.y * sizeof( uchar4 ) );
  217. if ( cerr != cudaSuccess )
  218. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  219.  
  220. // Copy data to GPU device
  221. cerr = cudaMemcpy( staryPic.PData, stary.PData, staryPic.Size.x * staryPic.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  222. if ( cerr != cudaSuccess )
  223. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  224.  
  225. cerr = cudaMemcpy( novyPic.PData, novy.PData, novyPic.Size.x * novyPic.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  226. if ( cerr != cudaSuccess )
  227. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  228.  
  229. // Grid creation with computed organization
  230. dim3 mrizka( ( staryPic.Size.x + block_size.x - 1 ) / block_size.x, ( staryPic.Size.y + block_size.y - 1 ) / block_size.y );
  231. kernel_vlozeni<<< mrizka, dim3( block_size.x, block_size.y ) >>>( staryPic, novyPic, pozice );
  232.  
  233. if ( ( cerr = cudaGetLastError() ) != cudaSuccess )
  234. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  235.  
  236. // Copy data from GPU device to PC
  237. cerr = cudaMemcpy( stary.PData, staryPic.PData, stary.Size.x * stary.Size.y * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  238. if ( cerr != cudaSuccess )
  239. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  240.  
  241. cerr = cudaMemcpy( novy.PData, novyPic.PData, novy.Size.x * novy.Size.y * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  242. if ( cerr != cudaSuccess )
  243. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  244.  
  245. // Free memory
  246. cudaFree( staryPic.PData );
  247. cudaFree( novyPic.PData );
  248.  
  249. }
  250.  
  251. __global__ void kernel_zmenseni( CUDA_Pic stary, CUDA_Pic novy) {
  252. // X,Y coordinates
  253. int y = blockDim.y * blockIdx.y + threadIdx.y;
  254. int x = blockDim.x * blockIdx.x + threadIdx.x;
  255. if ( x > stary.Size.x ) return;
  256. if ( y > stary.Size.y ) return;
  257.  
  258. // zmenseni
  259. uchar4 bgr = stary.PData[ ((2 * y) + 1) * stary.Size.x + ((2 * x) + 1) ];
  260.  
  261. // Store point [x,y] back to image
  262. novy.PData[ y * novy.Size.x + x ] = bgr;
  263.  
  264. }
  265.  
  266. void run_zmenseni(CUDA_Pic stary, CUDA_Pic novy, uint2 block_size) {
  267. cudaError_t cerr;
  268.  
  269. CUDA_Pic staryPic;
  270. staryPic.Size = stary.Size;
  271. CUDA_Pic novyPic;
  272. novyPic.Size = novy.Size;
  273.  
  274. // Memory allocation in GPU device
  275. cerr = cudaMalloc( &staryPic.PData, staryPic.Size.x * staryPic.Size.y * sizeof( uchar4 ) );
  276. if ( cerr != cudaSuccess )
  277. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  278.  
  279. cerr = cudaMalloc( &novyPic.PData, novyPic.Size.x * novyPic.Size.y * sizeof( uchar4 ) );
  280. if ( cerr != cudaSuccess )
  281. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  282.  
  283. // Copy data to GPU device
  284. cerr = cudaMemcpy( staryPic.PData, stary.PData, staryPic.Size.x * staryPic.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  285. if ( cerr != cudaSuccess )
  286. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  287.  
  288. cerr = cudaMemcpy( novyPic.PData, novy.PData, novyPic.Size.x * novyPic.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  289. if ( cerr != cudaSuccess )
  290. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  291.  
  292. // Grid creation with computed organization
  293. dim3 mrizka( ( novyPic.Size.x + block_size.x - 1 ) / block_size.x, ( novyPic.Size.y + block_size.y - 1 ) / block_size.y );
  294. kernel_zmenseni<<< mrizka, dim3( block_size.x, block_size.y ) >>>( staryPic, novyPic );
  295.  
  296. if ( ( cerr = cudaGetLastError() ) != cudaSuccess )
  297. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  298.  
  299. // Copy data from GPU device to PC
  300. cerr = cudaMemcpy( stary.PData, staryPic.PData, stary.Size.x * stary.Size.y * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  301. if ( cerr != cudaSuccess )
  302. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  303.  
  304. cerr = cudaMemcpy( novy.PData, novyPic.PData, novy.Size.x * novy.Size.y * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  305. if ( cerr != cudaSuccess )
  306. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  307.  
  308. // Free memory
  309. cudaFree( staryPic.PData );
  310. cudaFree( novyPic.PData );
  311.  
  312. }
  313.  
  314.  
  315.  
  316.  
  317.  
  318.  
  319.  
  320.  
  321.  
  322.  
  323.  
  324.  
  325.  
  326.  
  327.  
  328.  
  329.  
  330.  
  331.  
  332.  
  333.  
  334.  
  335.  
  336.  
  337.  
  338.  
  339.  
  340.  
  341.  
  342.  
  343.  
  344.  
  345.  
  346.  
  347.  
  348.  
  349.  
  350.  
  351. __global__ void kernel_flip( CUDA_Pic cuda_pic, int hor_ver) {
  352. if(hor_ver == 0) {
  353. // X,Y coordinates
  354. int y = blockDim.y * blockIdx.y + threadIdx.y;
  355. int x = blockDim.x * blockIdx.x + threadIdx.x;
  356. if ( x >= cuda_pic.Size.x/2 ) return;
  357. if ( y >= cuda_pic.Size.y ) return;
  358.  
  359. // Point [x,y] selection from image
  360. uchar4 bgr = cuda_pic.PData[ y * cuda_pic.Size.x + x ];
  361. uchar4 bgr2 = cuda_pic.PData[ (y) * cuda_pic.Size.x + (cuda_pic.Size.x - x) ];
  362.  
  363. // Store point [x,y] back to image
  364. cuda_pic.PData[ y * cuda_pic.Size.x + x ] = bgr2;
  365. cuda_pic.PData[ (y) * cuda_pic.Size.x + (cuda_pic.Size.x - x) ] = bgr;
  366. }
  367. else
  368. {
  369. // X,Y coordinates
  370. int y = blockDim.y * blockIdx.y + threadIdx.y;
  371. int x = blockDim.x * blockIdx.x + threadIdx.x;
  372. if ( x >= cuda_pic.Size.x ) return;
  373. if ( y >= cuda_pic.Size.y/2 ) return;
  374.  
  375. // Point [x,y] selection from image
  376. uchar4 bgr = cuda_pic.PData[ y * cuda_pic.Size.x + x ];
  377. uchar4 bgr2 = cuda_pic.PData[ (cuda_pic.Size.y - y) * cuda_pic.Size.x + (x) ];
  378.  
  379. // Store point [x,y] back to image
  380. cuda_pic.PData[ y * cuda_pic.Size.x + x ] = bgr2;
  381. cuda_pic.PData[ (cuda_pic.Size.y - y) * cuda_pic.Size.x + (x) ] = bgr;
  382.  
  383. }
  384. }
  385.  
  386. void run_flip( CUDA_Pic pic, uint2 block_size, int flip) {
  387. cudaError_t cerr;
  388.  
  389. CUDA_Pic cudaPic;
  390. cudaPic.Size = pic.Size;
  391.  
  392. // Memory allocation in GPU device
  393. cerr = cudaMalloc( &cudaPic.PData, cudaPic.Size.x * cudaPic.Size.y * sizeof( uchar4 ) );
  394. if ( cerr != cudaSuccess )
  395. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  396.  
  397. // Copy data to GPU device
  398. cerr = cudaMemcpy( cudaPic.PData, pic.PData, cudaPic.Size.x * cudaPic.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  399. if ( cerr != cudaSuccess )
  400. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  401.  
  402. // Grid creation with computed organization
  403. dim3 mrizka( ( cudaPic.Size.x + block_size.x - 1 ) / block_size.x, ( cudaPic.Size.y + block_size.y - 1 ) / block_size.y );
  404. kernel_flip<<< mrizka, dim3( block_size.x, block_size.y ) >>>( cudaPic, flip );
  405.  
  406. // Copy data to CPU device
  407. cerr = cudaMemcpy( pic.PData, cudaPic.PData, pic.Size.x * pic.Size.y * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  408. if ( cerr != cudaSuccess )
  409. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  410.  
  411. cudaFree( cudaPic.PData );
  412. }
  413.  
  414.  
  415.  
  416.  
  417.  
  418.  
  419.  
  420.  
  421.  
  422.  
  423.  
  424.  
  425.  
  426.  
  427.  
  428.  
  429.  
  430.  
  431.  
  432.  
  433.  
  434.  
  435.  
  436.  
  437.  
  438.  
  439.  
  440.  
  441.  
  442. // Every threads identifies its position in grid and in block and modify image
  443. __global__ void kernel_animation( CUDA_Pic cuda_pic )
  444. {
  445. // X,Y coordinates
  446. int y = blockDim.y * blockIdx.y + threadIdx.y;
  447. int x = blockDim.x * blockIdx.x + threadIdx.x;
  448. if ( x >= cuda_pic.Size.x ) return;
  449. if ( y >= cuda_pic.Size.y ) return;
  450.  
  451. // Point [x,y] selection from image
  452. uchar4 bgr = cuda_pic.PData[ y * cuda_pic.Size.x + x ];
  453.  
  454. // Color rotation inside block
  455. int x2 = blockDim.x / 2;
  456. int y2 = blockDim.y / 2;
  457. int px = __sad( x2, threadIdx.x, 0 ); // abs function
  458. int py = __sad( y2, threadIdx.y, 0 );
  459.  
  460. if ( px < x2 * ( y2 - py ) / y2 )
  461. {
  462. uchar4 tmp = bgr;
  463. bgr.x = tmp.y;
  464. bgr.y = tmp.z;
  465. bgr.z = tmp.x;
  466. }
  467.  
  468. // Store point [x,y] back to image
  469. cuda_pic.PData[ y * cuda_pic.Size.x + x ] = bgr;
  470.  
  471. }
  472.  
  473. void run_animation( CUDA_Pic pic, uint2 block_size )
  474. {
  475. cudaError_t cerr;
  476.  
  477. CUDA_Pic cudaPic;
  478. cudaPic.Size = pic.Size;
  479.  
  480. // Memory allocation in GPU device
  481. cerr = cudaMalloc( &cudaPic.PData, cudaPic.Size.x * cudaPic.Size.y * sizeof( uchar4 ) );
  482. if ( cerr != cudaSuccess )
  483. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  484.  
  485. // Copy data to GPU device
  486. cerr = cudaMemcpy( cudaPic.PData, pic.PData, cudaPic.Size.x * cudaPic.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
  487. if ( cerr != cudaSuccess )
  488. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  489.  
  490. // Grid creation with computed organization
  491. dim3 mrizka( ( cudaPic.Size.x + block_size.x - 1 ) / block_size.x, ( cudaPic.Size.y + block_size.y - 1 ) / block_size.y );
  492. kernel_animation<<< mrizka, dim3( block_size.x, block_size.y ) >>>( cudaPic );
  493.  
  494. if ( ( cerr = cudaGetLastError() ) != cudaSuccess )
  495. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  496.  
  497. // Copy data from GPU device to PC
  498. cerr = cudaMemcpy( pic.PData, cudaPic.PData, pic.Size.x * pic.Size.y * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
  499. if ( cerr != cudaSuccess )
  500. printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
  501.  
  502. // Free memory
  503. cudaFree( cudaPic.PData );
  504.  
  505. // For printf
  506. //cudaDeviceSynchronize();
  507.  
  508. }
Add Comment
Please, Sign In to add comment