Advertisement
Guest User

Untitled

a guest
Apr 20th, 2019
236
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 10.53 KB | None | 0 0
  1.  
  2. #include "cuda_runtime.h"
  3. #include "device_launch_parameters.h"
  4.  
  5. #include "cuda_runtime.h"
  6. #include "device_launch_parameters.h"
  7. #include <cuda.h>
  8. #include <device_functions.h>
  9. #include <cuda_runtime_api.h>
  10.  
  11. #include <stdio.h>
  12. #include <cstdlib>
  13. #include <iostream>
  14. #include <ostream>
  15. #include <iomanip>
  16. #include <windows.h>
  17. #include <conio.h>
  18. #include <cstdint>
  19. #include <chrono>
  20.  
  21. #include <npp.h>
  22.  
  23. #include "../common/inc/helper_cuda.h"
  24. #include "../common/inc/helper_image.h"
  25.  
  26. using namespace std;
  27.  
  28. void showImg(unsigned char* matrix, int N, int M);
  29. unsigned char* rebuildImg(unsigned char* img, int h, int w);
  30. unsigned char* CPU(unsigned char* img, unsigned char* newImg, int h, int w);
  31. unsigned char filterCPU(unsigned char* img, int i, int j, int w, int filter[3][3], int divisionCoef);
  32. unsigned char* GPU(unsigned char* img, int h, int w);
  33. __global__ void GPUfunc(unsigned char* img_origin, unsigned char* img_new, int w, int h, int dif);
  34. void compose(unsigned char* matrix1, unsigned char* matrix2,int h,int w);
  35. unsigned char* creat300mpx();
  36. __global__ void GPUfuncShared(unsigned char* img_origin, unsigned char* img_new, int w, int h, int dif);
  37. unsigned char* GPUShared(unsigned char* img, int h, int w);
  38.  
  39.  
  40.  
  41. __global__ void GPUfunc(unsigned char* img_origin, unsigned char* img_new, int w, int h, int dif) {
  42.     int curWidth = blockIdx.x * blockDim.x + threadIdx.x;
  43.     int curHeight = blockIdx.y * blockDim.y + threadIdx.y;
  44.  
  45.     int filter[3][3] = {
  46.         {1,-2,1},{-2,5,-2},{1,-2,1}
  47.     };
  48.  
  49.     /*if (curWidth >= w || curHeight >= h)
  50.         return;*/
  51.  
  52.     img_new[curWidth*w + curHeight] = ((img_origin[curWidth*(w + 2) + curHeight] * (filter[0][0])
  53.         + img_origin[(curWidth)*(w + 2) + (curHeight + 1)] * (filter[0][1]) + img_origin[(curWidth)*(w + 2)
  54.         + (curHeight + 2)] * (filter[0][2]) + img_origin[(curWidth + 1)*(w + 2) + (curHeight)] * (filter[1][0])
  55.         + img_origin[(curWidth + 1)*(w + 2) + (curHeight + 1)] * (filter[1][1]) + img_origin[(curWidth + 1)*(w + 2)
  56.         + (curHeight + 2)] * (filter[1][2]) + img_origin[(curWidth + 2)*(w + 2) + (curHeight)] * (filter[2][0])
  57.         + img_origin[(curWidth + 2)*(w + 2) + (curHeight + 1)] * (filter[2][1]) + img_origin[(curWidth + 2)*(w + 2)
  58.         + (curHeight + 2)] * (filter[2][2])) / dif);
  59. }
  60.  
  61. __global__ void GPUfuncShared(unsigned char* img_origin, unsigned char* img_new, int w, int h, int dif) {
  62.     int curWidth = blockIdx.x * blockDim.x + threadIdx.x;
  63.     int curHeight = blockIdx.y * blockDim.y + threadIdx.y;
  64.  
  65.     int filter[3][3] = {
  66.         { 1,-2,1 },{ -2,5,-2 },{ 1,-2,1 }
  67.     };
  68.  
  69.     __shared__ unsigned char block[32][32];
  70.  
  71.     block[threadIdx.y][threadIdx.x] = ((img_origin[curWidth*(w + 2) + curHeight] * (filter[0][0])
  72.         + img_origin[(curWidth)*(w + 2) + (curHeight + 1)] * (filter[0][1]) + img_origin[(curWidth)*(w + 2)
  73.         + (curHeight + 2)] * (filter[0][2]) + img_origin[(curWidth + 1)*(w + 2) + (curHeight)] * (filter[1][0])
  74.         + img_origin[(curWidth + 1)*(w + 2) + (curHeight + 1)] * (filter[1][1]) + img_origin[(curWidth + 1)*(w + 2)
  75.         + (curHeight + 2)] * (filter[1][2]) + img_origin[(curWidth + 2)*(w + 2) + (curHeight)] * (filter[2][0])
  76.         + img_origin[(curWidth + 2)*(w + 2) + (curHeight + 1)] * (filter[2][1]) + img_origin[(curWidth + 2)*(w + 2)
  77.         + (curHeight + 2)] * (filter[2][2])) / dif);
  78.  
  79.      img_new[curWidth*w + curHeight] = block[threadIdx.y][threadIdx.x];
  80.  
  81. }
  82.  
  83. int main() {
  84.     unsigned char* img = NULL;
  85.  
  86.     unsigned char *rebuild = NULL;
  87.     unsigned int w = 0;
  88.     unsigned int h = 0;
  89.     unsigned int chanels = 1;
  90.  
  91.     __loadPPM("C:\\Users\\messi\\Documents\\Visual Studio 2015\\Projects\\AVP5neW\\AVP5neW\\smile.pgm", &img, &w, &h, &chanels);
  92.     //img = creat300mpx();
  93.     rebuild = rebuildImg(img, h, w);
  94.  
  95.     unsigned char* cpuImg = (unsigned char*)calloc((w)*(h), sizeof(unsigned char));
  96.     unsigned char* gpuImg = (unsigned char*)calloc((w)*(h), sizeof(unsigned char));
  97.     unsigned char* gpuImgShared = (unsigned char*)calloc((w)*(h), sizeof(unsigned char));
  98.  
  99.     showImg(img,h,w);
  100.    
  101.     cpuImg = CPU(rebuild, cpuImg, h,w);
  102.     gpuImg = GPU(rebuild, h,w);
  103.     gpuImgShared = GPUShared(rebuild, h, w);
  104.  
  105.     cout << h << "x" << w<<endl;
  106.  
  107.     compose(cpuImg,gpuImg,h,w);
  108.     compose(gpuImgShared, gpuImg, h, w);
  109.  
  110.     __savePPM("C:\\Users\\messi\\Documents\\Visual Studio 2015\\Projects\\AVP5neW\\AVP5neW\\cpu.pgm", cpuImg, w, h, chanels);
  111.     __savePPM("C:\\Users\\messi\\Documents\\Visual Studio 2015\\Projects\\AVP5neW\\AVP5neW\\gpu.pgm", gpuImg, w, h, chanels);
  112.     __savePPM("C:\\Users\\messi\\Documents\\Visual Studio 2015\\Projects\\AVP5neW\\AVP5neW\\gpuShared.pgm", gpuImgShared, w, h, chanels);
  113.  
  114.     system("pause");
  115. }
  116.  
  117. void compose(unsigned char* matrix1, unsigned char* matrix2,int h,int w) {
  118.     int t = 0;
  119.     for (int i = 0; i < h*w; i++)
  120.         if (matrix1[i] != matrix2[i]) {
  121.             t++;
  122.         }
  123.     cout << t << endl;
  124. }
  125.  
  126. void showImg(unsigned char*matrix, int N, int M) {
  127.     for (int i = 0; i < N; i++) {
  128.         cout << endl;
  129.         for (int j = 0; j < M; j++) {
  130.             cout << matrix[i*M + j];
  131.         }
  132.     }
  133.     cout << endl;
  134. }
  135.  
  136. unsigned char* rebuildImg(unsigned char* img, int h, int w) {
  137.     unsigned char* newImg = new unsigned char[(w + 3)*(h + 3)];
  138.  
  139.     newImg[0] = img[0];
  140.     newImg[w + 1] = img[w - 1];
  141.     newImg[(h + 1)*(w + 1) + (w + 1)] = img[(h - 1)*(w - 1) + (w - 1)];
  142.     newImg[(h + 1)*(w + 1) + (w + 1) + (w + 1)] = img[(h - 1)*(w - 1) + (w - 1) + (w - 1)];
  143.  
  144.     for (int i = 0; i < w; i++) {
  145.         newImg[i + 1] = img[i];
  146.     }
  147.     for (int i = 0; i < w; i++) {
  148.         newImg[(i + 1) + (h + 1)*(w + 1) + (w + 1)] = img[i + (h - 1)*(w - 1) + (w - 1)];
  149.     }
  150.     for (int i = 0; i < h; i++) {
  151.         newImg[(i + 1)*(w + 2)] = img[i*w];
  152.     }
  153.     for (int i = 0; i < h; i++) {
  154.         newImg[(i + 1)*(w + 2) + w + 1] = img[i*w + w - 1];
  155.     }
  156.  
  157.     for (int i = 0; i < h; i++) {
  158.         for (int j = 0; j < w; j++) {
  159.             newImg[(i + 1)*(w + 2) + j + 1] = img[i*(w)+j];
  160.         }
  161.     }
  162.  
  163.     return newImg;
  164. }
  165.  
  166. unsigned char* CPU(unsigned char* img,unsigned char* newImg, int h, int w) {
  167.     int filter[3][3] = {
  168.         { 1,-2,1 },{ -2,5,-2 },{ 1,-2,1 }
  169.     };
  170.     int divisionCoef = 1;
  171.  
  172.     chrono::time_point<chrono::steady_clock> start, end;
  173.     start = chrono::steady_clock::now();
  174.     for (int i = 0; i < h; i++) {
  175.         for (int j = 0; j < w; j++) {
  176.             newImg[(i)*w + (j)] = filterCPU(img,i,j,w,filter,divisionCoef);
  177.         }
  178.     }
  179.     end = chrono::steady_clock::now();
  180.     auto CPU_TIME = end - start;
  181.     cout << "CPU time:" << chrono::duration <double, milli>(CPU_TIME).count() << " ms" << endl;
  182.     return newImg;
  183. }
  184.  
  185. unsigned char filterCPU(unsigned char* img, int i, int j, int w, int filter[3][3], int divisionCoef) {
  186.     unsigned char pixel = ((img[i*(w + 2) + j] * (filter[0][0]) + img[(i)*(w + 2)
  187.         + (j + 1)] * (filter[0][1]) + img[(i)*(w + 2) + (j + 2)] * (filter[0][2])
  188.         + img[(i + 1)*(w + 2) + (j)] * (filter[1][0]) + img[(i + 1)*(w + 2) + (j + 1)] * (filter[1][1])
  189.         + img[(i + 1)*(w + 2) + (j + 2)] * (filter[1][2]) + img[(i + 2)*(w + 2) + (j)] * (filter[2][0])
  190.         + img[(i + 2)*(w + 2) + (j + 1)] * (filter[2][1]) + img[(i + 2)*(w + 2) + (j + 2)] * (filter[2][2]))/divisionCoef);
  191.    
  192.     return pixel;
  193. }
  194.  
  195. unsigned char* GPUShared(unsigned char* img, int h, int w) {
  196.     int dif = 1;
  197.  
  198.     cudaError_t cudaStatus;
  199.  
  200.     cudaEvent_t startTime;
  201.     cudaEvent_t stopTime;
  202.  
  203.     cudaEventCreate(&startTime);
  204.     cudaEventCreate(&stopTime);
  205.  
  206.     float resultTime;
  207.  
  208.     unsigned char* result = new unsigned char[(w)*(h)];
  209.     unsigned char* img_origin;
  210.  
  211.     cudaStatus = cudaMalloc((void**)&img_origin, (h+2)*(w+2) * sizeof(unsigned char));
  212.     if (cudaStatus != cudaSuccess) {
  213.         fprintf(stderr, "cudaMalloc failed!");
  214.         exit(EXIT_FAILURE);
  215.     }
  216.  
  217.     cudaStatus = cudaMemcpy(img_origin, img, (h+2)*(w+2) * sizeof(unsigned char), cudaMemcpyHostToDevice);
  218.     if (cudaStatus != cudaSuccess) {
  219.         fprintf(stderr, "cudaMemcpy failed!");
  220.         exit(EXIT_FAILURE);
  221.     }
  222.  
  223.     unsigned char* img_new;
  224.     cudaStatus = cudaMalloc((void**)&img_new, (h)*(w) * sizeof(unsigned char));
  225.     if (cudaStatus != cudaSuccess) {
  226.         fprintf(stderr, "cudaMalloc failed!");
  227.         exit(EXIT_FAILURE);
  228.     }
  229.  
  230.     dim3 bl(8, 4);
  231.     dim3 grid;
  232.  
  233.     grid.x = h / bl.x;
  234.     if (h % bl.x != 0)
  235.         grid.x += 1;
  236.  
  237.     grid.y = w / bl.y;
  238.     if (w % bl.y != 0)
  239.         grid.y += 1;
  240.  
  241.     cudaEventRecord(startTime);
  242.     GPUfuncShared << <grid, bl>> > (img_origin, img_new, w, h,dif);
  243.     cudaDeviceSynchronize();
  244.  
  245.     cudaEventRecord(stopTime);
  246.     cudaEventSynchronize(stopTime);
  247.     cudaThreadSynchronize();
  248.  
  249.     cudaEventElapsedTime(&resultTime, startTime, stopTime);
  250.  
  251.     cout << "GPU Shared time:" << resultTime << " ms" << endl;
  252.     cudaDeviceSynchronize();
  253.  
  254.     cudaStatus = cudaMemcpy(result, img_new, h*w * sizeof(unsigned char), cudaMemcpyDeviceToHost);
  255.     if (cudaStatus != cudaSuccess) {
  256.         fprintf(stderr, "cudaMemcpy failed!");
  257.         exit(EXIT_FAILURE);
  258.     }
  259.     cudaThreadSynchronize();
  260.  
  261.     cudaFree(img_new);
  262.     cudaFree(img_origin);
  263.  
  264.     return result;
  265. }
  266.  
  267. unsigned char* GPU(unsigned char* img, int h, int w) {
  268.     int dif = 1;
  269.  
  270.     cudaError_t cudaStatus;
  271.  
  272.     cudaEvent_t startTime;
  273.     cudaEvent_t stopTime;
  274.  
  275.     cudaEventCreate(&startTime);
  276.     cudaEventCreate(&stopTime);
  277.  
  278.     float resultTime;
  279.  
  280.     unsigned char* result = new unsigned char[(w)*(h)];
  281.     unsigned char* img_origin;
  282.  
  283.     cudaStatus = cudaMalloc((void**)&img_origin, (h + 2)*(w + 2) * sizeof(unsigned char));
  284.     if (cudaStatus != cudaSuccess) {
  285.         fprintf(stderr, "cudaMalloc failed!");
  286.         exit(EXIT_FAILURE);
  287.     }
  288.  
  289.     cudaStatus = cudaMemcpy(img_origin, img, (h + 2)*(w + 2) * sizeof(unsigned char), cudaMemcpyHostToDevice);
  290.     if (cudaStatus != cudaSuccess) {
  291.         fprintf(stderr, "cudaMemcpy failed!");
  292.         exit(EXIT_FAILURE);
  293.     }
  294.  
  295.     unsigned char* img_new;
  296.     cudaStatus = cudaMalloc((void**)&img_new, (h)*(w) * sizeof(unsigned char));
  297.     if (cudaStatus != cudaSuccess) {
  298.         fprintf(stderr, "cudaMalloc failed!");
  299.         exit(EXIT_FAILURE);
  300.     }
  301.  
  302.     dim3 bl(8, 4);
  303.     dim3 grid;
  304.  
  305.     grid.x = h / bl.x;
  306.     if (h % bl.x != 0)
  307.         grid.x += 1;
  308.  
  309.     grid.y = w / bl.y;
  310.     if (w % bl.y != 0)
  311.         grid.y += 1;
  312.  
  313.     cudaEventRecord(startTime);
  314.     GPUfunc << <grid, bl >> > (img_origin, img_new, w, h, dif);
  315.     cudaDeviceSynchronize();
  316.  
  317.     cudaEventRecord(stopTime);
  318.     cudaEventSynchronize(stopTime);
  319.     cudaThreadSynchronize();
  320.  
  321.     cudaEventElapsedTime(&resultTime, startTime, stopTime);
  322.  
  323.     cout << "GPU time:" << resultTime << " ms" << endl;
  324.     cudaDeviceSynchronize();
  325.  
  326.     cudaStatus = cudaMemcpy(result, img_new, h*w * sizeof(unsigned char), cudaMemcpyDeviceToHost);
  327.     if (cudaStatus != cudaSuccess) {
  328.         fprintf(stderr, "cudaMemcpy failed!");
  329.         exit(EXIT_FAILURE);
  330.     }
  331.     cudaThreadSynchronize();
  332.  
  333.     cudaFree(img_new);
  334.     cudaFree(img_origin);
  335.  
  336.     return result;
  337. }
  338.  
  339. unsigned char* creat300mpx() {
  340.     unsigned char *image = (unsigned char*)calloc(30000*10000, sizeof(unsigned char));
  341.  
  342.     for (int i = 0; i < 10000 * 30000; i++)
  343.     {
  344.         image[i] = (unsigned char)rand() % 200;
  345.     }
  346.     return image;
  347. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement