Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // ***********************************************************************
- //
- // Demo program for education in subject
- // Computer Architectures and Paralel Systems.
- // Petr Olivka, dep. of Computer Science, FEI, VSB-TU Ostrava
- // email:petr.olivka@vsb.cz
- //
- // Example of CUDA Technology Usage.
- // Manipulation with prepared image.
- //
- // ***********************************************************************
- #include <cuda_runtime.h>
- #include <device_launch_parameters.h>
- #include <stdio.h>
- #include "pic_type.h"
- __global__ void kernel_vlozeni( CUDA_Pic velky, CUDA_Pic maly, CUDA_Pic vysledek, int2 pozice) {
- // X,Y coordinates
- int y = blockDim.y * blockIdx.y + threadIdx.y;
- int x = blockDim.x * blockIdx.x + threadIdx.x;
- if ( x >= velky.Size.x ) return;
- if ( y >= velky.Size.y ) return;
- uchar4 bgr;
- if(x >= pozice.x && x <= pozice.x + maly.Size.x && y >= pozice.y && y <= pozice.y + maly.Size.y )
- bgr = maly.PData[ ( y - pozice.y ) * maly.Size.x + ( x - pozice.x ) ];
- else
- bgr = velky.PData[ y * velky.Size.x + x ];
- // Store point [x,y] back to image
- vysledek.PData[ (y) * vysledek.Size.x + (x) ] = bgr;
- }
- void run_startAnimace(CUDA_Pic velky, CUDA_Pic &CUDAvelky, CUDA_Pic maly, CUDA_Pic &CUDAmaly, CUDA_Pic vysledek, CUDA_Pic &CUDAvysledek) {
- cudaError_t cerr;
- CUDAvelky.Size = velky.Size;
- CUDAmaly.Size = maly.Size;
- CUDAvysledek.Size = vysledek.Size;
- // Memory allocation in GPU device
- cerr = cudaMalloc( &CUDAvelky.PData, CUDAvelky.Size.x * CUDAvelky.Size.y * sizeof( uchar4 ) );
- if ( cerr != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- cerr = cudaMalloc( &CUDAmaly.PData, CUDAmaly.Size.x * CUDAmaly.Size.y * sizeof( uchar4 ) );
- if ( cerr != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- cerr = cudaMalloc( &CUDAvysledek.PData, CUDAvysledek.Size.x * CUDAvysledek.Size.y * sizeof( uchar4 ) );
- if ( cerr != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- // Copy data to GPU device
- cerr = cudaMemcpy( CUDAvelky.PData, velky.PData, CUDAvelky.Size.x * CUDAvelky.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
- if ( cerr != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- cerr = cudaMemcpy( CUDAmaly.PData, maly.PData, CUDAmaly.Size.x * CUDAmaly.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
- if ( ( cerr = cudaGetLastError() ) != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- cerr = cudaMemcpy( CUDAvysledek.PData, vysledek.PData, CUDAvysledek.Size.x * CUDAvysledek.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
- if ( ( cerr = cudaGetLastError() ) != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- }
- void run_prubehAnimace(CUDA_Pic velky, CUDA_Pic &CUDAvelky, CUDA_Pic maly, CUDA_Pic &CUDAmaly, CUDA_Pic vysledek, CUDA_Pic &CUDAvysledek, uint2 block_size, int2 pozice) {
- // Grid creation with computed organization
- CUDAvysledek.PData = CUDAvelky.PData;
- dim3 mrizka( ( CUDAmaly.Size.x + block_size.x - 1 ) / block_size.x, ( CUDAmaly.Size.y + block_size.y - 1 ) / block_size.y );
- kernel_vlozeni<<< mrizka, dim3( block_size.x, block_size.y ) >>>( CUDAvelky, CUDAmaly, CUDAvysledek, pozice);
- cudaError_t cerr;
- // Copy data from GPU device to PC
- cerr = cudaMemcpy( vysledek.PData, CUDAvysledek.PData, vysledek.Size.x * vysledek.Size.y * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
- if ( ( cerr = cudaGetLastError() ) != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- }
- void run_konecAnimace(CUDA_Pic CUDAvelky, CUDA_Pic CUDAmaly, CUDA_Pic CUDAvysledek) {
- cudaFree( CUDAvelky.PData );
- cudaFree( CUDAmaly.PData );
- cudaFree( CUDAvysledek.PData );
- }
- /*__global__ void kernel_rozmazani( CUDA_Pic stary, CUDA_Pic novy) {
- // X,Y coordinates
- int y = blockDim.y * blockIdx.y + threadIdx.y + 1;
- int x = blockDim.x * blockIdx.x + threadIdx.x + 1;
- if ( x > stary.Size.x - 1 ) return;
- if ( y > stary.Size.y - 1 ) return;
- // zmenseni
- // Store point [x,y] back to image
- novy.PData[ y * novy.Size.x + x ] = bgr;
- }
- void run_rozmazani(CUDA_Pic stary, CUDA_Pic novy, uint2 block_size) {
- cudaError_t cerr;
- CUDA_Pic staryPic;
- staryPic.Size = stary.Size;
- CUDA_Pic novyPic;
- novyPic.Size = novy.Size;
- // Memory allocation in GPU device
- cerr = cudaMalloc( &staryPic.PData, staryPic.Size.x * staryPic.Size.y * sizeof( uchar4 ) );
- if ( cerr != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- cerr = cudaMalloc( &novyPic.PData, novyPic.Size.x * novyPic.Size.y * sizeof( uchar4 ) );
- if ( cerr != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- // Copy data to GPU device
- cerr = cudaMemcpy( staryPic.PData, stary.PData, staryPic.Size.x * staryPic.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
- if ( cerr != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- cerr = cudaMemcpy( novyPic.PData, novy.PData, novyPic.Size.x * novyPic.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
- if ( cerr != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- // Grid creation with computed organization
- dim3 mrizka( ( novyPic.Size.x + block_size.x - 1 ) / block_size.x, ( novyPic.Size.y + block_size.y - 1 ) / block_size.y );
- kernel_rozmazani<<< mrizka, dim3( block_size.x, block_size.y ) >>>( staryPic, novyPic );
- if ( ( cerr = cudaGetLastError() ) != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- // Copy data from GPU device to PC
- cerr = cudaMemcpy( stary.PData, staryPic.PData, stary.Size.x * stary.Size.y * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
- if ( cerr != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- cerr = cudaMemcpy( novy.PData, novyPic.PData, novy.Size.x * novy.Size.y * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
- if ( cerr != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- // Free memory
- cudaFree( staryPic.PData );
- cudaFree( novyPic.PData );
- }*/
- __global__ void kernel_vlozeni( CUDA_Pic velky, CUDA_Pic maly, int2 pozice) {
- // X,Y coordinates
- int y = blockDim.y * blockIdx.y + threadIdx.y;
- int x = blockDim.x * blockIdx.x + threadIdx.x;
- if ( x >= maly.Size.x ) return;
- if ( y >= maly.Size.y ) return;
- // zmenseni
- uchar4 bgr = maly.PData[ y * maly.Size.x + x ];
- // Store point [x,y] back to image
- velky.PData[ (y + pozice.y) * velky.Size.x + (x + pozice.x) ] = bgr;
- }
- void run_vlozeni(CUDA_Pic stary, CUDA_Pic novy, uint2 block_size, int2 pozice) {
- cudaError_t cerr;
- CUDA_Pic staryPic;
- staryPic.Size = stary.Size;
- CUDA_Pic novyPic;
- novyPic.Size = novy.Size;
- // Memory allocation in GPU device
- cerr = cudaMalloc( &staryPic.PData, staryPic.Size.x * staryPic.Size.y * sizeof( uchar4 ) );
- if ( cerr != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- cerr = cudaMalloc( &novyPic.PData, novyPic.Size.x * novyPic.Size.y * sizeof( uchar4 ) );
- if ( cerr != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- // Copy data to GPU device
- cerr = cudaMemcpy( staryPic.PData, stary.PData, staryPic.Size.x * staryPic.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
- if ( cerr != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- cerr = cudaMemcpy( novyPic.PData, novy.PData, novyPic.Size.x * novyPic.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
- if ( cerr != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- // Grid creation with computed organization
- dim3 mrizka( ( staryPic.Size.x + block_size.x - 1 ) / block_size.x, ( staryPic.Size.y + block_size.y - 1 ) / block_size.y );
- kernel_vlozeni<<< mrizka, dim3( block_size.x, block_size.y ) >>>( staryPic, novyPic, pozice );
- if ( ( cerr = cudaGetLastError() ) != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- // Copy data from GPU device to PC
- cerr = cudaMemcpy( stary.PData, staryPic.PData, stary.Size.x * stary.Size.y * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
- if ( cerr != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- cerr = cudaMemcpy( novy.PData, novyPic.PData, novy.Size.x * novy.Size.y * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
- if ( cerr != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- // Free memory
- cudaFree( staryPic.PData );
- cudaFree( novyPic.PData );
- }
- __global__ void kernel_zmenseni( CUDA_Pic stary, CUDA_Pic novy) {
- // X,Y coordinates
- int y = blockDim.y * blockIdx.y + threadIdx.y;
- int x = blockDim.x * blockIdx.x + threadIdx.x;
- if ( x > stary.Size.x ) return;
- if ( y > stary.Size.y ) return;
- // zmenseni
- uchar4 bgr = stary.PData[ ((2 * y) + 1) * stary.Size.x + ((2 * x) + 1) ];
- // Store point [x,y] back to image
- novy.PData[ y * novy.Size.x + x ] = bgr;
- }
- void run_zmenseni(CUDA_Pic stary, CUDA_Pic novy, uint2 block_size) {
- cudaError_t cerr;
- CUDA_Pic staryPic;
- staryPic.Size = stary.Size;
- CUDA_Pic novyPic;
- novyPic.Size = novy.Size;
- // Memory allocation in GPU device
- cerr = cudaMalloc( &staryPic.PData, staryPic.Size.x * staryPic.Size.y * sizeof( uchar4 ) );
- if ( cerr != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- cerr = cudaMalloc( &novyPic.PData, novyPic.Size.x * novyPic.Size.y * sizeof( uchar4 ) );
- if ( cerr != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- // Copy data to GPU device
- cerr = cudaMemcpy( staryPic.PData, stary.PData, staryPic.Size.x * staryPic.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
- if ( cerr != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- cerr = cudaMemcpy( novyPic.PData, novy.PData, novyPic.Size.x * novyPic.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
- if ( cerr != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- // Grid creation with computed organization
- dim3 mrizka( ( novyPic.Size.x + block_size.x - 1 ) / block_size.x, ( novyPic.Size.y + block_size.y - 1 ) / block_size.y );
- kernel_zmenseni<<< mrizka, dim3( block_size.x, block_size.y ) >>>( staryPic, novyPic );
- if ( ( cerr = cudaGetLastError() ) != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- // Copy data from GPU device to PC
- cerr = cudaMemcpy( stary.PData, staryPic.PData, stary.Size.x * stary.Size.y * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
- if ( cerr != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- cerr = cudaMemcpy( novy.PData, novyPic.PData, novy.Size.x * novy.Size.y * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
- if ( cerr != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- // Free memory
- cudaFree( staryPic.PData );
- cudaFree( novyPic.PData );
- }
- __global__ void kernel_flip( CUDA_Pic cuda_pic, int hor_ver) {
- if(hor_ver == 0) {
- // X,Y coordinates
- int y = blockDim.y * blockIdx.y + threadIdx.y;
- int x = blockDim.x * blockIdx.x + threadIdx.x;
- if ( x >= cuda_pic.Size.x/2 ) return;
- if ( y >= cuda_pic.Size.y ) return;
- // Point [x,y] selection from image
- uchar4 bgr = cuda_pic.PData[ y * cuda_pic.Size.x + x ];
- uchar4 bgr2 = cuda_pic.PData[ (y) * cuda_pic.Size.x + (cuda_pic.Size.x - x) ];
- // Store point [x,y] back to image
- cuda_pic.PData[ y * cuda_pic.Size.x + x ] = bgr2;
- cuda_pic.PData[ (y) * cuda_pic.Size.x + (cuda_pic.Size.x - x) ] = bgr;
- }
- else
- {
- // X,Y coordinates
- int y = blockDim.y * blockIdx.y + threadIdx.y;
- int x = blockDim.x * blockIdx.x + threadIdx.x;
- if ( x >= cuda_pic.Size.x ) return;
- if ( y >= cuda_pic.Size.y/2 ) return;
- // Point [x,y] selection from image
- uchar4 bgr = cuda_pic.PData[ y * cuda_pic.Size.x + x ];
- uchar4 bgr2 = cuda_pic.PData[ (cuda_pic.Size.y - y) * cuda_pic.Size.x + (x) ];
- // Store point [x,y] back to image
- cuda_pic.PData[ y * cuda_pic.Size.x + x ] = bgr2;
- cuda_pic.PData[ (cuda_pic.Size.y - y) * cuda_pic.Size.x + (x) ] = bgr;
- }
- }
- void run_flip( CUDA_Pic pic, uint2 block_size, int flip) {
- cudaError_t cerr;
- CUDA_Pic cudaPic;
- cudaPic.Size = pic.Size;
- // Memory allocation in GPU device
- cerr = cudaMalloc( &cudaPic.PData, cudaPic.Size.x * cudaPic.Size.y * sizeof( uchar4 ) );
- if ( cerr != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- // Copy data to GPU device
- cerr = cudaMemcpy( cudaPic.PData, pic.PData, cudaPic.Size.x * cudaPic.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
- if ( cerr != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- // Grid creation with computed organization
- dim3 mrizka( ( cudaPic.Size.x + block_size.x - 1 ) / block_size.x, ( cudaPic.Size.y + block_size.y - 1 ) / block_size.y );
- kernel_flip<<< mrizka, dim3( block_size.x, block_size.y ) >>>( cudaPic, flip );
- // Copy data to CPU device
- cerr = cudaMemcpy( pic.PData, cudaPic.PData, pic.Size.x * pic.Size.y * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
- if ( cerr != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- cudaFree( cudaPic.PData );
- }
- // Every threads identifies its position in grid and in block and modify image
- __global__ void kernel_animation( CUDA_Pic cuda_pic )
- {
- // X,Y coordinates
- int y = blockDim.y * blockIdx.y + threadIdx.y;
- int x = blockDim.x * blockIdx.x + threadIdx.x;
- if ( x >= cuda_pic.Size.x ) return;
- if ( y >= cuda_pic.Size.y ) return;
- // Point [x,y] selection from image
- uchar4 bgr = cuda_pic.PData[ y * cuda_pic.Size.x + x ];
- // Color rotation inside block
- int x2 = blockDim.x / 2;
- int y2 = blockDim.y / 2;
- int px = __sad( x2, threadIdx.x, 0 ); // abs function
- int py = __sad( y2, threadIdx.y, 0 );
- if ( px < x2 * ( y2 - py ) / y2 )
- {
- uchar4 tmp = bgr;
- bgr.x = tmp.y;
- bgr.y = tmp.z;
- bgr.z = tmp.x;
- }
- // Store point [x,y] back to image
- cuda_pic.PData[ y * cuda_pic.Size.x + x ] = bgr;
- }
- void run_animation( CUDA_Pic pic, uint2 block_size )
- {
- cudaError_t cerr;
- CUDA_Pic cudaPic;
- cudaPic.Size = pic.Size;
- // Memory allocation in GPU device
- cerr = cudaMalloc( &cudaPic.PData, cudaPic.Size.x * cudaPic.Size.y * sizeof( uchar4 ) );
- if ( cerr != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- // Copy data to GPU device
- cerr = cudaMemcpy( cudaPic.PData, pic.PData, cudaPic.Size.x * cudaPic.Size.y * sizeof( uchar4 ), cudaMemcpyHostToDevice );
- if ( cerr != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- // Grid creation with computed organization
- dim3 mrizka( ( cudaPic.Size.x + block_size.x - 1 ) / block_size.x, ( cudaPic.Size.y + block_size.y - 1 ) / block_size.y );
- kernel_animation<<< mrizka, dim3( block_size.x, block_size.y ) >>>( cudaPic );
- if ( ( cerr = cudaGetLastError() ) != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- // Copy data from GPU device to PC
- cerr = cudaMemcpy( pic.PData, cudaPic.PData, pic.Size.x * pic.Size.y * sizeof( uchar4 ), cudaMemcpyDeviceToHost );
- if ( cerr != cudaSuccess )
- printf( "CUDA Error [%d] - '%s'\n", __LINE__, cudaGetErrorString( cerr ) );
- // Free memory
- cudaFree( cudaPic.PData );
- // For printf
- //cudaDeviceSynchronize();
- }
Add Comment
Please, Sign In to add comment