Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include "cuda_runtime.h"
- #include "device_launch_parameters.h"
- #include "cuda_runtime.h"
- #include "device_launch_parameters.h"
- #include <cuda.h>
- #include <device_functions.h>
- #include <cuda_runtime_api.h>
- #include <stdio.h>
- #include <cstdlib>
- #include <iostream>
- #include <ostream>
- #include <iomanip>
- #include <windows.h>
- #include <conio.h>
- #include <cstdint>
- #include <chrono>
- #include <npp.h>
- #include "../common/inc/helper_cuda.h"
- #include "../common/inc/helper_image.h"
- using namespace std;
- void showImg(unsigned char* matrix, int N, int M);
- unsigned char* rebuildImg(unsigned char* img, int h, int w);
- unsigned char* CPU(unsigned char* img, unsigned char* newImg, int h, int w);
- unsigned char filterCPU(unsigned char* img, int i, int j, int w, int filter[3][3], int divisionCoef);
- unsigned char* GPU(unsigned char* img, int h, int w);
- __global__ void GPUfunc(unsigned char* img_origin, unsigned char* img_new, int w, int h, int dif);
- void compose(unsigned char* matrix1, unsigned char* matrix2,int h,int w);
- unsigned char* creat300mpx();
- __global__ void GPUfuncShared(unsigned char* img_origin, unsigned char* img_new, int w, int h, int dif);
- unsigned char* GPUShared(unsigned char* img, int h, int w);
- __global__ void GPUfunc(unsigned char* img_origin, unsigned char* img_new, int w, int h, int dif) {
- int curWidth = blockIdx.x * blockDim.x + threadIdx.x;
- int curHeight = blockIdx.y * blockDim.y + threadIdx.y;
- int filter[3][3] = {
- {1,-2,1},{-2,5,-2},{1,-2,1}
- };
- /*if (curWidth >= w || curHeight >= h)
- return;*/
- img_new[curWidth*w + curHeight] = ((img_origin[curWidth*(w + 2) + curHeight] * (filter[0][0])
- + img_origin[(curWidth)*(w + 2) + (curHeight + 1)] * (filter[0][1]) + img_origin[(curWidth)*(w + 2)
- + (curHeight + 2)] * (filter[0][2]) + img_origin[(curWidth + 1)*(w + 2) + (curHeight)] * (filter[1][0])
- + img_origin[(curWidth + 1)*(w + 2) + (curHeight + 1)] * (filter[1][1]) + img_origin[(curWidth + 1)*(w + 2)
- + (curHeight + 2)] * (filter[1][2]) + img_origin[(curWidth + 2)*(w + 2) + (curHeight)] * (filter[2][0])
- + img_origin[(curWidth + 2)*(w + 2) + (curHeight + 1)] * (filter[2][1]) + img_origin[(curWidth + 2)*(w + 2)
- + (curHeight + 2)] * (filter[2][2])) / dif);
- }
- __global__ void GPUfuncShared(unsigned char* img_origin, unsigned char* img_new, int w, int h, int dif) {
- int curWidth = blockIdx.x * blockDim.x + threadIdx.x;
- int curHeight = blockIdx.y * blockDim.y + threadIdx.y;
- int filter[3][3] = {
- { 1,-2,1 },{ -2,5,-2 },{ 1,-2,1 }
- };
- __shared__ unsigned char block[32][32];
- block[threadIdx.y][threadIdx.x] = ((img_origin[curWidth*(w + 2) + curHeight] * (filter[0][0])
- + img_origin[(curWidth)*(w + 2) + (curHeight + 1)] * (filter[0][1]) + img_origin[(curWidth)*(w + 2)
- + (curHeight + 2)] * (filter[0][2]) + img_origin[(curWidth + 1)*(w + 2) + (curHeight)] * (filter[1][0])
- + img_origin[(curWidth + 1)*(w + 2) + (curHeight + 1)] * (filter[1][1]) + img_origin[(curWidth + 1)*(w + 2)
- + (curHeight + 2)] * (filter[1][2]) + img_origin[(curWidth + 2)*(w + 2) + (curHeight)] * (filter[2][0])
- + img_origin[(curWidth + 2)*(w + 2) + (curHeight + 1)] * (filter[2][1]) + img_origin[(curWidth + 2)*(w + 2)
- + (curHeight + 2)] * (filter[2][2])) / dif);
- img_new[curWidth*w + curHeight] = block[threadIdx.y][threadIdx.x];
- }
- int main() {
- unsigned char* img = NULL;
- unsigned char *rebuild = NULL;
- unsigned int w = 0;
- unsigned int h = 0;
- unsigned int chanels = 1;
- __loadPPM("C:\\Users\\messi\\Documents\\Visual Studio 2015\\Projects\\AVP5neW\\AVP5neW\\smile.pgm", &img, &w, &h, &chanels);
- //img = creat300mpx();
- rebuild = rebuildImg(img, h, w);
- unsigned char* cpuImg = (unsigned char*)calloc((w)*(h), sizeof(unsigned char));
- unsigned char* gpuImg = (unsigned char*)calloc((w)*(h), sizeof(unsigned char));
- unsigned char* gpuImgShared = (unsigned char*)calloc((w)*(h), sizeof(unsigned char));
- showImg(img,h,w);
- cpuImg = CPU(rebuild, cpuImg, h,w);
- gpuImg = GPU(rebuild, h,w);
- gpuImgShared = GPUShared(rebuild, h, w);
- cout << h << "x" << w<<endl;
- compose(cpuImg,gpuImg,h,w);
- compose(gpuImgShared, gpuImg, h, w);
- __savePPM("C:\\Users\\messi\\Documents\\Visual Studio 2015\\Projects\\AVP5neW\\AVP5neW\\cpu.pgm", cpuImg, w, h, chanels);
- __savePPM("C:\\Users\\messi\\Documents\\Visual Studio 2015\\Projects\\AVP5neW\\AVP5neW\\gpu.pgm", gpuImg, w, h, chanels);
- __savePPM("C:\\Users\\messi\\Documents\\Visual Studio 2015\\Projects\\AVP5neW\\AVP5neW\\gpuShared.pgm", gpuImgShared, w, h, chanels);
- system("pause");
- }
- void compose(unsigned char* matrix1, unsigned char* matrix2,int h,int w) {
- int t = 0;
- for (int i = 0; i < h*w; i++)
- if (matrix1[i] != matrix2[i]) {
- t++;
- }
- cout << t << endl;
- }
- void showImg(unsigned char*matrix, int N, int M) {
- for (int i = 0; i < N; i++) {
- cout << endl;
- for (int j = 0; j < M; j++) {
- cout << matrix[i*M + j];
- }
- }
- cout << endl;
- }
- unsigned char* rebuildImg(unsigned char* img, int h, int w) {
- unsigned char* newImg = new unsigned char[(w + 3)*(h + 3)];
- newImg[0] = img[0];
- newImg[w + 1] = img[w - 1];
- newImg[(h + 1)*(w + 1) + (w + 1)] = img[(h - 1)*(w - 1) + (w - 1)];
- newImg[(h + 1)*(w + 1) + (w + 1) + (w + 1)] = img[(h - 1)*(w - 1) + (w - 1) + (w - 1)];
- for (int i = 0; i < w; i++) {
- newImg[i + 1] = img[i];
- }
- for (int i = 0; i < w; i++) {
- newImg[(i + 1) + (h + 1)*(w + 1) + (w + 1)] = img[i + (h - 1)*(w - 1) + (w - 1)];
- }
- for (int i = 0; i < h; i++) {
- newImg[(i + 1)*(w + 2)] = img[i*w];
- }
- for (int i = 0; i < h; i++) {
- newImg[(i + 1)*(w + 2) + w + 1] = img[i*w + w - 1];
- }
- for (int i = 0; i < h; i++) {
- for (int j = 0; j < w; j++) {
- newImg[(i + 1)*(w + 2) + j + 1] = img[i*(w)+j];
- }
- }
- return newImg;
- }
- unsigned char* CPU(unsigned char* img,unsigned char* newImg, int h, int w) {
- int filter[3][3] = {
- { 1,-2,1 },{ -2,5,-2 },{ 1,-2,1 }
- };
- int divisionCoef = 1;
- chrono::time_point<chrono::steady_clock> start, end;
- start = chrono::steady_clock::now();
- for (int i = 0; i < h; i++) {
- for (int j = 0; j < w; j++) {
- newImg[(i)*w + (j)] = filterCPU(img,i,j,w,filter,divisionCoef);
- }
- }
- end = chrono::steady_clock::now();
- auto CPU_TIME = end - start;
- cout << "CPU time:" << chrono::duration <double, milli>(CPU_TIME).count() << " ms" << endl;
- return newImg;
- }
- unsigned char filterCPU(unsigned char* img, int i, int j, int w, int filter[3][3], int divisionCoef) {
- unsigned char pixel = ((img[i*(w + 2) + j] * (filter[0][0]) + img[(i)*(w + 2)
- + (j + 1)] * (filter[0][1]) + img[(i)*(w + 2) + (j + 2)] * (filter[0][2])
- + img[(i + 1)*(w + 2) + (j)] * (filter[1][0]) + img[(i + 1)*(w + 2) + (j + 1)] * (filter[1][1])
- + img[(i + 1)*(w + 2) + (j + 2)] * (filter[1][2]) + img[(i + 2)*(w + 2) + (j)] * (filter[2][0])
- + img[(i + 2)*(w + 2) + (j + 1)] * (filter[2][1]) + img[(i + 2)*(w + 2) + (j + 2)] * (filter[2][2]))/divisionCoef);
- return pixel;
- }
- unsigned char* GPUShared(unsigned char* img, int h, int w) {
- int dif = 1;
- cudaError_t cudaStatus;
- cudaEvent_t startTime;
- cudaEvent_t stopTime;
- cudaEventCreate(&startTime);
- cudaEventCreate(&stopTime);
- float resultTime;
- unsigned char* result = new unsigned char[(w)*(h)];
- unsigned char* img_origin;
- cudaStatus = cudaMalloc((void**)&img_origin, (h+2)*(w+2) * sizeof(unsigned char));
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaMalloc failed!");
- exit(EXIT_FAILURE);
- }
- cudaStatus = cudaMemcpy(img_origin, img, (h+2)*(w+2) * sizeof(unsigned char), cudaMemcpyHostToDevice);
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaMemcpy failed!");
- exit(EXIT_FAILURE);
- }
- unsigned char* img_new;
- cudaStatus = cudaMalloc((void**)&img_new, (h)*(w) * sizeof(unsigned char));
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaMalloc failed!");
- exit(EXIT_FAILURE);
- }
- dim3 bl(8, 4);
- dim3 grid;
- grid.x = h / bl.x;
- if (h % bl.x != 0)
- grid.x += 1;
- grid.y = w / bl.y;
- if (w % bl.y != 0)
- grid.y += 1;
- cudaEventRecord(startTime);
- GPUfuncShared << <grid, bl>> > (img_origin, img_new, w, h,dif);
- cudaDeviceSynchronize();
- cudaEventRecord(stopTime);
- cudaEventSynchronize(stopTime);
- cudaThreadSynchronize();
- cudaEventElapsedTime(&resultTime, startTime, stopTime);
- cout << "GPU Shared time:" << resultTime << " ms" << endl;
- cudaDeviceSynchronize();
- cudaStatus = cudaMemcpy(result, img_new, h*w * sizeof(unsigned char), cudaMemcpyDeviceToHost);
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaMemcpy failed!");
- exit(EXIT_FAILURE);
- }
- cudaThreadSynchronize();
- cudaFree(img_new);
- cudaFree(img_origin);
- return result;
- }
- unsigned char* GPU(unsigned char* img, int h, int w) {
- int dif = 1;
- cudaError_t cudaStatus;
- cudaEvent_t startTime;
- cudaEvent_t stopTime;
- cudaEventCreate(&startTime);
- cudaEventCreate(&stopTime);
- float resultTime;
- unsigned char* result = new unsigned char[(w)*(h)];
- unsigned char* img_origin;
- cudaStatus = cudaMalloc((void**)&img_origin, (h + 2)*(w + 2) * sizeof(unsigned char));
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaMalloc failed!");
- exit(EXIT_FAILURE);
- }
- cudaStatus = cudaMemcpy(img_origin, img, (h + 2)*(w + 2) * sizeof(unsigned char), cudaMemcpyHostToDevice);
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaMemcpy failed!");
- exit(EXIT_FAILURE);
- }
- unsigned char* img_new;
- cudaStatus = cudaMalloc((void**)&img_new, (h)*(w) * sizeof(unsigned char));
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaMalloc failed!");
- exit(EXIT_FAILURE);
- }
- dim3 bl(8, 4);
- dim3 grid;
- grid.x = h / bl.x;
- if (h % bl.x != 0)
- grid.x += 1;
- grid.y = w / bl.y;
- if (w % bl.y != 0)
- grid.y += 1;
- cudaEventRecord(startTime);
- GPUfunc << <grid, bl >> > (img_origin, img_new, w, h, dif);
- cudaDeviceSynchronize();
- cudaEventRecord(stopTime);
- cudaEventSynchronize(stopTime);
- cudaThreadSynchronize();
- cudaEventElapsedTime(&resultTime, startTime, stopTime);
- cout << "GPU time:" << resultTime << " ms" << endl;
- cudaDeviceSynchronize();
- cudaStatus = cudaMemcpy(result, img_new, h*w * sizeof(unsigned char), cudaMemcpyDeviceToHost);
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaMemcpy failed!");
- exit(EXIT_FAILURE);
- }
- cudaThreadSynchronize();
- cudaFree(img_new);
- cudaFree(img_origin);
- return result;
- }
- unsigned char* creat300mpx() {
- unsigned char *image = (unsigned char*)calloc(30000*10000, sizeof(unsigned char));
- for (int i = 0; i < 10000 * 30000; i++)
- {
- image[i] = (unsigned char)rand() % 200;
- }
- return image;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement