Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #define _USE_MATH_DEFINES
- #include "iostream"
- #include <cmath>
- #include "cuda_runtime.h"
- #include "device_launch_parameters.h"
- #include <opencv2/opencv.hpp>
- #include <stdio.h>
- #include <string>
- #include <chrono>
- #define CUDA_KC 32
- #define CUDA_BS 32
- using namespace std;
- using namespace cv;
- string imageinputPath;
- string imageOutputPath;
- const int gaussFilterSize = 5;
- const double sigma = 1.5;
- int imageCols;
- int imageRows;
- int imageChannels;
- size_t imageSizeInBytes;
- double **matrix;
- double *matrix1D;
- int loadParams(int argc, char** argv);
- Mat * loadImage();
- void compGaussianMatrix2D(int size, double sigma);
- void compGaussianMatrix1D(int size);
- __global__
- static void performGaussParrallel2(uint8_t* inputImage, uint8_t* outImgage, double *cudaMatrix, int imageCols, int imageRows, int imageChannels);
- int main(int argc, char **argv)
- {
- cudaError_t err = cudaSuccess;
- loadParams(argc, argv); //loads params from CMD
- //gets image data from file
- Mat * inputImage = loadImage();
- if (inputImage == NULL) {
- return 0;
- }
- //HOST orginal image data init
- uint8_t *hostOrginalImageData = (uint8_t*)inputImage->data;
- compGaussianMatrix2D(gaussFilterSize, sigma);//computes gaussian matrix
- compGaussianMatrix1D(gaussFilterSize);//gaussian matrix as a vector
- imageCols = inputImage->cols;
- imageRows = inputImage->rows;
- imageChannels = inputImage->channels();
- imageSizeInBytes = imageCols * imageRows * imageChannels * sizeof(uint8_t);
- //HOST Processed Image Data
- uint8_t *hostProcessedImageData = (uint8_t *)malloc(imageSizeInBytes);
- //CUDA processed image data alloc
- uint8_t *cudaProcessedImageData = NULL;
- err = cudaMalloc(&cudaProcessedImageData, imageSizeInBytes);
- if (err != cudaSuccess) {cout << "Error while malloc cudaProcessedImageData \n";exit(EXIT_FAILURE);}
- //CUDA orginal image data alloc and memcpy from HOST
- uint8_t *cudaOrginalImageData = NULL;
- err = cudaMalloc(&cudaOrginalImageData, imageSizeInBytes);
- if (err != cudaSuccess) {cout << "Error while malloc cudaOrginalImageData \n";exit(EXIT_FAILURE);}
- err = cudaMemcpy(cudaOrginalImageData, hostOrginalImageData, imageSizeInBytes, cudaMemcpyHostToDevice);
- if (err != cudaSuccess) {cout << "Error while cudaMemcpy cudaOrginalImageData \n";exit(EXIT_FAILURE);}
- //CUDA matrix alloc and memcpy from HOST
- double *cudaMatrix = NULL;
- size_t cudaMatrixSize = 5 * 5 * sizeof(double);
- err = cudaMalloc(&cudaMatrix, cudaMatrixSize);
- if (err != cudaSuccess) {cout << "Error while cudaMalloc cudaMatrix \n";exit(EXIT_FAILURE);}
- err = cudaMemcpy(cudaMatrix, matrix1D, cudaMatrixSize, cudaMemcpyHostToDevice);
- if (err != cudaSuccess) {cout << "Error while cudaMemcpy cudaMatrix \n";exit(EXIT_FAILURE);}
- chrono::system_clock::time_point start;
- chrono::system_clock::time_point stop;
- start = chrono::high_resolution_clock::now();
- performGaussParrallel2<<<CUDA_KC, CUDA_BS>>>(cudaOrginalImageData, cudaProcessedImageData, cudaMatrix, imageCols, imageRows, imageChannels);
- stop = chrono::high_resolution_clock::now();
- cudaDeviceSynchronize();
- err = cudaGetLastError();
- if (err != cudaSuccess){cout << "Error while calling performGaussParrallel2! \n" << cudaGetErrorString(err);exit(EXIT_FAILURE);}
- chrono::duration<double> elapsed = stop - start;
- err = cudaMemcpy(hostProcessedImageData, cudaProcessedImageData, imageSizeInBytes, cudaMemcpyDeviceToHost);
- if (err != cudaSuccess) {cout << "Error while cudaMemcpy processedimage \n";exit(EXIT_FAILURE);}
- err = cudaFree(cudaProcessedImageData);
- if (err != cudaSuccess){fprintf(stderr, "Failed to free device vector A (error code %s)!\n", cudaGetErrorString(err));exit(EXIT_FAILURE);}
- err = cudaFree(cudaOrginalImageData);
- if (err != cudaSuccess){fprintf(stderr, "Failed to free device vector A (error code %s)!\n", cudaGetErrorString(err));exit(EXIT_FAILURE);}
- Mat newImageMat = Mat(imageRows, imageCols, inputImage->type());
- try {
- newImageMat.data = hostProcessedImageData;
- imwrite(imageOutputPath, newImageMat);
- } catch (exception e) {
- cout << "Cant save image!";
- return 0;
- }
- cout << "Czas: " << elapsed.count();
- // system("pause");
- return 0;
- }
- __global__
- static void performGaussParrallel2(uint8_t* inputImage, uint8_t* outImgage, double *cudaMatrix, int imageCols, int imageRows, int imageChannels) {
- const int tid = threadIdx.x;
- //const int bid = blockIdx.x;
- // int imageSizeInBytes = imageCols * imageRows * imageChannels * sizeof(uint8_t);
- int blockSize = (imageCols * imageRows) / 512;
- int startPoint = tid*blockSize * imageChannels * sizeof(uint8_t);
- int h, w;
- int currentPos = startPoint;
- int outputPosition = startPoint;
- int wallValue = imageCols * imageRows * imageChannels - (5 * imageCols * imageChannels);
- while (startPoint + blockSize * imageChannels * sizeof(uint8_t) > currentPos && currentPos < wallValue) {
- double outChannel1 = 0;
- double outChannel2 = 0;
- double outChannel3 = 0;
- for (h = 0; h < 5; h++) {
- for (w = 0; w < 5; w++) {
- int pixelPosXY = currentPos + h * imageChannels * imageCols + w * imageChannels;
- double channel1 = inputImage[pixelPosXY];
- double channel2 = inputImage[pixelPosXY + 1];
- double channel3 = inputImage[pixelPosXY + 2];
- double *matrixVal = &cudaMatrix[h+w*5];
- outChannel1 += *matrixVal * channel1;
- outChannel2 += *matrixVal * channel2;
- outChannel3 += *matrixVal * channel3;
- //cout << x << " x " << y << " y "<< h <<" h "<< w <<" w\n" ;
- }
- }
- outImgage[outputPosition] = outChannel1;
- outImgage[outputPosition + 1] = outChannel2;
- outImgage[outputPosition + 2] = outChannel3;
- outputPosition += 3;
- currentPos += 3;
- if (currentPos > imageCols * imageRows * imageChannels - (5 * imageCols * imageChannels)) {
- break;
- }
- }
- }
- void compGaussianMatrix2D(int size, double sigma) {
- matrix = new double*[size];
- for (int i = 0; i < size; ++i) {
- matrix[i] = new double[size];
- }
- double result = 0.;
- int i, j;
- for (i = 0; i < size; i++) {
- for (j = 0; j < size; j++) {
- matrix[i][j] = exp(-(i*i + j * j) / (2 * sigma*sigma)) / (2 * M_PI *sigma*sigma);
- result += matrix[i][j];
- }
- }
- for (i = 0; i < size; i++) {
- for (j = 0; j < size; j++) {
- matrix[i][j] = matrix[i][j] / result;
- //cout << matrix[i][j] << " ";
- }
- //cout << "\n";
- }
- };
- void compGaussianMatrix1D(int size) {
- matrix1D = new double[size*size];
- int i, j;
- for (i = 0; i < size; i++) {
- for (j = 0; j < size; j++) {
- matrix1D[i+j*size] = matrix[i][j];
- }
- }
- }
- //wczytuje parametry z argv
- int loadParams(int argc, char** argv) {
- imageinputPath = "2.jpg";
- imageOutputPath = "out.jpg";
- if (argc == 1) {
- cout << "Nie podano sciezek do pliku wejsciowego i wyjsciowego!";
- return -1;
- }
- else {
- imageinputPath = argv[1];
- }
- if (argc == 2) {
- cout << "Nie podano sciezki wyjsciowej do pliku!";
- return -1;
- }
- else {
- imageOutputPath = argv[2];
- }
- return 0;
- };
- //zwraca macierz obrazu
- Mat * loadImage() {
- static Mat inputImage;
- if (imageinputPath.length() != 0) {
- inputImage = imread(imageinputPath, IMREAD_COLOR);
- if (inputImage.empty()) {
- cout << "Cant load file! \n";
- return NULL;
- }
- }
- else {
- cout << "error! Invalid image path!\n";
- return NULL;
- }
- return &inputImage;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement