Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #define _USE_MATH_DEFINES
- #include "iostream"
- #include <cmath>
- #include "cuda_runtime.h"
- #include "device_launch_parameters.h"
- #include <opencv2/opencv.hpp>
- #include <stdio.h>
- #include <string>
- #include <chrono>
- using namespace std;
- using namespace cv;
- string imageinputPath;
- string imageOutputPath;
- const int gaussFilterSize = 5;
- const double sigma = 1.5;
- int imageCols;
- int imageRows;
- int imageChannels;
- size_t imageSizeInBytes;
- double **matrix;
- double *matrix1D;
- int loadParams(int argc, char** argv);
- Mat * loadImage();
- void compGaussianMatrix2D(int size, double sigma);
- void compGaussianMatrix1D(int size);
- __global__
- static void performGaussParrallel2(uint8_t* inputImage, uint8_t* outImgage, double *cudaMatrix, int imageCols, int imageRows, int imageChannels);
- int main(int argc, char **argv)
- {
- cudaError_t err = cudaSuccess;
- loadParams(argc, argv); //loads params from CMD
- //gets image data from file
- Mat * inputImage = loadImage();
- if (inputImage == NULL) {
- return 0;
- }
- //HOST orginal image data init
- uint8_t *hostOrginalImageData = (uint8_t*)inputImage->data;
- compGaussianMatrix2D(gaussFilterSize, sigma);//computes gaussian matrix
- compGaussianMatrix1D(gaussFilterSize);//gaussian matrix as a vector
- imageCols = inputImage->cols;
- imageRows = inputImage->rows;
- imageChannels = inputImage->channels();
- imageSizeInBytes = imageCols * imageRows * imageChannels * sizeof(uint8_t);
- //CUDA processed image data alloc
- uint8_t *cudaProcessedImageData = NULL;
- err = cudaMalloc(&cudaProcessedImageData, imageSizeInBytes);
- if (err != cudaSuccess) {cout << "Error while malloc cudaProcessedImageData \n"; system("pause"); exit(EXIT_FAILURE);}
- int blockSizeInt = 32;
- int blockX, blockY;
- blockX = imageCols / blockSizeInt + 1;
- blockY = imageRows / blockSizeInt + 1;
- //CUDA orginal image data alloc and memcpy from HOST
- uint8_t *cudaOrginalImageData = NULL;
- err = cudaMalloc(&cudaOrginalImageData, imageSizeInBytes);
- if (err != cudaSuccess) {cout << "Error while malloc cudaOrginalImageData \n"; system("pause"); exit(EXIT_FAILURE);}
- err = cudaMemcpy(cudaOrginalImageData, hostOrginalImageData, imageSizeInBytes, cudaMemcpyHostToDevice);
- if (err != cudaSuccess) {cout << "Error while cudaMemcpy cudaOrginalImageData \n"; system("pause"); exit(EXIT_FAILURE);}
- //CUDA matrix alloc and memcpy from HOST
- double *cudaMatrix = NULL;
- size_t cudaMatrixSize = 5 * 5 * sizeof(double);
- err = cudaMalloc(&cudaMatrix, cudaMatrixSize);
- if (err != cudaSuccess) {cout << "Error while cudaMalloc cudaMatrix \n"; system("pause"); exit(EXIT_FAILURE);}
- err = cudaMemcpy(cudaMatrix, matrix1D, cudaMatrixSize, cudaMemcpyHostToDevice);
- if (err != cudaSuccess) {cout << "Error while cudaMemcpy cudaMatrix \n"; system("pause"); exit(EXIT_FAILURE);}
- chrono::system_clock::time_point start;
- chrono::system_clock::time_point stop;
- start = chrono::high_resolution_clock::now();
- /*int blockSizeInt = 32;
- int blockX, blockY;
- blockX = imageCols / blockSizeInt + 1;
- blockY = imageRows / blockSizeInt + 1;*/
- dim3 blockSize(blockSizeInt, blockSizeInt);
- dim3 gridSize(blockX, blockY);
- performGaussParrallel2<<<gridSize, blockSize >>>(cudaOrginalImageData, cudaProcessedImageData, cudaMatrix, imageCols, imageRows, imageChannels);
- //cudaDeviceSynchronize();
- err = cudaGetLastError();
- if (err != cudaSuccess) { cout << "Error while calling performGaussParrallel2! \n" << cudaGetErrorString(err); exit(EXIT_FAILURE); }
- stop = chrono::high_resolution_clock::now();
- chrono::duration<double> elapsed = stop - start;
- //HOST Processed Image Data
- uint8_t *hostProcessedImageData = (uint8_t *)malloc(imageSizeInBytes);
- err = cudaMemcpy(hostProcessedImageData, cudaProcessedImageData, imageSizeInBytes, cudaMemcpyDeviceToHost);
- if (err != cudaSuccess) {cout << "Error while cudaMemcpy processedimage \n"<< cudaGetErrorString(err); exit(EXIT_FAILURE);}
- err = cudaFree(cudaProcessedImageData);
- err = cudaFree(cudaOrginalImageData);
- Mat newImageMat = Mat(imageRows, imageCols, inputImage->type());
- try {
- newImageMat.data = hostProcessedImageData;
- imwrite(imageOutputPath, newImageMat);
- } catch (exception e) {
- cout << "Cant save image!";
- return 0;
- }
- cout << "Czas: " << elapsed.count();
- return 0;
- }
- __global__
- static void performGaussParrallel2(uint8_t* inputImage, uint8_t* outImgage, double *cudaMatrix, int imageCols, int imageRows, int imageChannels) {
- long x = blockIdx.x * blockDim.x + threadIdx.x;
- long y = blockIdx.y * blockDim.y + threadIdx.y;
- int h, w;
- if (x < imageCols - 5 && y < imageRows - 5) {
- double outChannel1 = 0;
- double outChannel2 = 0;
- double outChannel3 = 0;
- for (h = 0; h < 5; h++) {
- for (w = 0; w < 5; w++) {
- int pixelPosXY = (imageCols * (y + h) + x + w) * imageChannels;
- double channel1 = inputImage[pixelPosXY];
- double channel2 = inputImage[pixelPosXY + 1];
- double channel3 = inputImage[pixelPosXY + 2];
- double *matrixVal = &cudaMatrix[h * 5 + w];
- outChannel1 += *matrixVal * channel1;
- outChannel2 += *matrixVal * channel2;
- outChannel3 += *matrixVal * channel3;
- }
- }
- int outputPosition = (imageCols * y + x) * imageChannels;
- outImgage[outputPosition] = outChannel1;
- outImgage[outputPosition + 1] = outChannel2;
- outImgage[outputPosition + 2] = outChannel3;
- }
- }
- void compGaussianMatrix2D(int size, double sigma) {
- matrix = new double*[size];
- for (int i = 0; i < size; ++i) {
- matrix[i] = new double[size];
- }
- double result = 0.;
- int i, j;
- for (i = 0; i < size; i++) {
- for (j = 0; j < size; j++) {
- matrix[i][j] = exp(-(i*i + j * j) / (2 * sigma*sigma)) / (2 * M_PI *sigma*sigma);
- result += matrix[i][j];
- }
- }
- for (i = 0; i < size; i++) {
- for (j = 0; j < size; j++) {
- matrix[i][j] = matrix[i][j] / result;
- }
- }
- };
- void compGaussianMatrix1D(int size) {
- matrix1D = new double[size*size];
- int i, j;
- for (i = 0; i < size; i++) {
- for (j = 0; j < size; j++) {
- matrix1D[i+j*size] = matrix[i][j];
- }
- }
- }
- //wczytuje parametry z argv
- int loadParams(int argc, char** argv) {
- imageinputPath = "2.jpg";
- imageOutputPath = "out.jpg";
- if (argc == 1) {
- cout << "Nie podano sciezek do pliku wejsciowego i wyjsciowego!";
- return -1;
- }
- else {
- imageinputPath = argv[1];
- }
- if (argc == 2) {
- cout << "Nie podano sciezki wyjsciowej do pliku!";
- return -1;
- }
- else {
- imageOutputPath = argv[2];
- }
- return 0;
- };
- //zwraca macierz obrazu
- Mat * loadImage() {
- static Mat inputImage;
- if (imageinputPath.length() != 0) {
- inputImage = imread(imageinputPath, IMREAD_COLOR);
- if (inputImage.empty()) {
- cout << "Cant load file! \n";
- return NULL;
- }
- }
- else {
- cout << "error! Invalid image path!\n";
- return NULL;
- }
- return &inputImage;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement