Advertisement
Guest User

Untitled

a guest
Mar 21st, 2018
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.41 KB | None | 0 0
  1. #define _USE_MATH_DEFINES
  2. #include "iostream"
  3. #include <cmath>
  4. #include "cuda_runtime.h"
  5. #include "device_launch_parameters.h"
  6. #include <opencv2/opencv.hpp>
  7. #include <stdio.h>
  8. #include <string>
  9. #include <chrono>
  10.  
  11. using namespace std;
  12. using namespace cv;
  13.  
  14. string imageinputPath;
  15. string imageOutputPath;
  16.  
  17. const int gaussFilterSize = 5;
  18. const double sigma = 1.5;
  19.  
  20. int imageCols;
  21. int imageRows;
  22. int imageChannels;
  23. size_t imageSizeInBytes;
  24.  
  25. double **matrix;
  26. double *matrix1D;
  27.  
  28. int loadParams(int argc, char** argv);
  29. Mat * loadImage();
  30. void compGaussianMatrix2D(int size, double sigma);
  31. void compGaussianMatrix1D(int size);
  32. __global__
  33. static void performGaussParrallel2(uint8_t* inputImage, uint8_t* outImgage, double *cudaMatrix, int imageCols, int imageRows, int imageChannels);
  34.  
  35. int main(int argc, char **argv)
  36. {
  37. cudaError_t err = cudaSuccess;
  38. loadParams(argc, argv); //loads params from CMD
  39. //gets image data from file
  40. Mat * inputImage = loadImage();
  41. if (inputImage == NULL) {
  42. return 0;
  43. }
  44. //HOST orginal image data init
  45. uint8_t *hostOrginalImageData = (uint8_t*)inputImage->data;
  46.  
  47. compGaussianMatrix2D(gaussFilterSize, sigma);//computes gaussian matrix
  48. compGaussianMatrix1D(gaussFilterSize);//gaussian matrix as a vector
  49. imageCols = inputImage->cols;
  50. imageRows = inputImage->rows;
  51. imageChannels = inputImage->channels();
  52. imageSizeInBytes = imageCols * imageRows * imageChannels * sizeof(uint8_t);
  53.  
  54. //CUDA processed image data alloc
  55. uint8_t *cudaProcessedImageData = NULL;
  56. err = cudaMalloc(&cudaProcessedImageData, imageSizeInBytes);
  57. if (err != cudaSuccess) {cout << "Error while malloc cudaProcessedImageData \n"; system("pause"); exit(EXIT_FAILURE);}
  58.  
  59.  
  60. int blockSizeInt = 32;
  61. int blockX, blockY;
  62. blockX = imageCols / blockSizeInt + 1;
  63. blockY = imageRows / blockSizeInt + 1;
  64. //CUDA orginal image data alloc and memcpy from HOST
  65. uint8_t *cudaOrginalImageData = NULL;
  66. err = cudaMalloc(&cudaOrginalImageData, imageSizeInBytes);
  67. if (err != cudaSuccess) {cout << "Error while malloc cudaOrginalImageData \n"; system("pause"); exit(EXIT_FAILURE);}
  68. err = cudaMemcpy(cudaOrginalImageData, hostOrginalImageData, imageSizeInBytes, cudaMemcpyHostToDevice);
  69. if (err != cudaSuccess) {cout << "Error while cudaMemcpy cudaOrginalImageData \n"; system("pause"); exit(EXIT_FAILURE);}
  70.  
  71. //CUDA matrix alloc and memcpy from HOST
  72. double *cudaMatrix = NULL;
  73. size_t cudaMatrixSize = 5 * 5 * sizeof(double);
  74. err = cudaMalloc(&cudaMatrix, cudaMatrixSize);
  75. if (err != cudaSuccess) {cout << "Error while cudaMalloc cudaMatrix \n"; system("pause"); exit(EXIT_FAILURE);}
  76. err = cudaMemcpy(cudaMatrix, matrix1D, cudaMatrixSize, cudaMemcpyHostToDevice);
  77. if (err != cudaSuccess) {cout << "Error while cudaMemcpy cudaMatrix \n"; system("pause"); exit(EXIT_FAILURE);}
  78.  
  79. chrono::system_clock::time_point start;
  80. chrono::system_clock::time_point stop;
  81. start = chrono::high_resolution_clock::now();
  82.  
  83. /*int blockSizeInt = 32;
  84. int blockX, blockY;
  85. blockX = imageCols / blockSizeInt + 1;
  86. blockY = imageRows / blockSizeInt + 1;*/
  87.  
  88. dim3 blockSize(blockSizeInt, blockSizeInt);
  89. dim3 gridSize(blockX, blockY);
  90. performGaussParrallel2<<<gridSize, blockSize >>>(cudaOrginalImageData, cudaProcessedImageData, cudaMatrix, imageCols, imageRows, imageChannels);
  91. //cudaDeviceSynchronize();
  92. err = cudaGetLastError();
  93. if (err != cudaSuccess) { cout << "Error while calling performGaussParrallel2! \n" << cudaGetErrorString(err); exit(EXIT_FAILURE); }
  94.  
  95. stop = chrono::high_resolution_clock::now();
  96. chrono::duration<double> elapsed = stop - start;
  97.  
  98. //HOST Processed Image Data
  99. uint8_t *hostProcessedImageData = (uint8_t *)malloc(imageSizeInBytes);
  100. err = cudaMemcpy(hostProcessedImageData, cudaProcessedImageData, imageSizeInBytes, cudaMemcpyDeviceToHost);
  101. if (err != cudaSuccess) {cout << "Error while cudaMemcpy processedimage \n"<< cudaGetErrorString(err); exit(EXIT_FAILURE);}
  102. err = cudaFree(cudaProcessedImageData);
  103. err = cudaFree(cudaOrginalImageData);
  104. Mat newImageMat = Mat(imageRows, imageCols, inputImage->type());
  105. try {
  106. newImageMat.data = hostProcessedImageData;
  107. imwrite(imageOutputPath, newImageMat);
  108. } catch (exception e) {
  109. cout << "Cant save image!";
  110. return 0;
  111. }
  112.  
  113. cout << "Czas: " << elapsed.count();
  114. return 0;
  115. }
  116.  
  117. __global__
  118. static void performGaussParrallel2(uint8_t* inputImage, uint8_t* outImgage, double *cudaMatrix, int imageCols, int imageRows, int imageChannels) {
  119. long x = blockIdx.x * blockDim.x + threadIdx.x;
  120. long y = blockIdx.y * blockDim.y + threadIdx.y;
  121. int h, w;
  122. if (x < imageCols - 5 && y < imageRows - 5) {
  123. double outChannel1 = 0;
  124. double outChannel2 = 0;
  125. double outChannel3 = 0;
  126. for (h = 0; h < 5; h++) {
  127. for (w = 0; w < 5; w++) {
  128. int pixelPosXY = (imageCols * (y + h) + x + w) * imageChannels;
  129. double channel1 = inputImage[pixelPosXY];
  130. double channel2 = inputImage[pixelPosXY + 1];
  131. double channel3 = inputImage[pixelPosXY + 2];
  132. double *matrixVal = &cudaMatrix[h * 5 + w];
  133. outChannel1 += *matrixVal * channel1;
  134. outChannel2 += *matrixVal * channel2;
  135. outChannel3 += *matrixVal * channel3;
  136. }
  137. }
  138. int outputPosition = (imageCols * y + x) * imageChannels;
  139. outImgage[outputPosition] = outChannel1;
  140. outImgage[outputPosition + 1] = outChannel2;
  141. outImgage[outputPosition + 2] = outChannel3;
  142. }
  143. }
  144.  
  145. void compGaussianMatrix2D(int size, double sigma) {
  146. matrix = new double*[size];
  147. for (int i = 0; i < size; ++i) {
  148. matrix[i] = new double[size];
  149. }
  150.  
  151. double result = 0.;
  152. int i, j;
  153. for (i = 0; i < size; i++) {
  154. for (j = 0; j < size; j++) {
  155. matrix[i][j] = exp(-(i*i + j * j) / (2 * sigma*sigma)) / (2 * M_PI *sigma*sigma);
  156. result += matrix[i][j];
  157. }
  158. }
  159.  
  160. for (i = 0; i < size; i++) {
  161. for (j = 0; j < size; j++) {
  162. matrix[i][j] = matrix[i][j] / result;
  163. }
  164. }
  165. };
  166.  
  167. void compGaussianMatrix1D(int size) {
  168. matrix1D = new double[size*size];
  169. int i, j;
  170. for (i = 0; i < size; i++) {
  171. for (j = 0; j < size; j++) {
  172. matrix1D[i+j*size] = matrix[i][j];
  173. }
  174. }
  175. }
  176.  
  177. //wczytuje parametry z argv
  178. int loadParams(int argc, char** argv) {
  179. imageinputPath = "2.jpg";
  180. imageOutputPath = "out.jpg";
  181.  
  182. if (argc == 1) {
  183. cout << "Nie podano sciezek do pliku wejsciowego i wyjsciowego!";
  184. return -1;
  185. }
  186. else {
  187. imageinputPath = argv[1];
  188. }
  189.  
  190. if (argc == 2) {
  191. cout << "Nie podano sciezki wyjsciowej do pliku!";
  192. return -1;
  193. }
  194. else {
  195. imageOutputPath = argv[2];
  196. }
  197. return 0;
  198. };
  199.  
  200. //zwraca macierz obrazu
  201. Mat * loadImage() {
  202. static Mat inputImage;
  203. if (imageinputPath.length() != 0) {
  204. inputImage = imread(imageinputPath, IMREAD_COLOR);
  205. if (inputImage.empty()) {
  206. cout << "Cant load file! \n";
  207. return NULL;
  208. }
  209. }
  210. else {
  211. cout << "error! Invalid image path!\n";
  212. return NULL;
  213. }
  214. return &inputImage;
  215. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement