Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #define NUM_OF_THREADS 512
- #define IMAGE_NAME "image.jpg"
- #include "cuda_runtime.h"
- #include "device_launch_parameters.h"
- #include <iostream>
- #include <iomanip>
- #include <stdio.h>
- #include <ctime>
- #include <chrono>
- #include <ctime>
- #include <ratio>
- #include <opencv4/opencv2/opencv.hpp>
- #include <sys/types.h>
- #include <dirent.h>
- #include <mpi/mpi.h>
- #include <vector>
- struct bgrPixel
- {
- uchar blue;
- uchar green;
- uchar red;
- };
- struct processInfo
- {
- int number;
- int rows;
- int columns;
- int channels;
- char name[256];
- };
- __global__ void filterImageOneChannel(uchar *pixels, uchar *output, const int *rows, const int *columns) {
- //printf("%d\n", threadIdx.x);
- int actualPositionInMatrix = blockDim.x * blockIdx.x + threadIdx.x;
- int matrixDimension = *rows * *columns;
- if(actualPositionInMatrix > matrixDimension - 1){
- return;
- }
- int row = actualPositionInMatrix / *columns;
- int column = actualPositionInMatrix % *columns;
- __shared__ uchar firstLine[NUM_OF_THREADS + 2];
- __shared__ uchar secondLine[NUM_OF_THREADS + 2];
- __shared__ uchar thirdLine[NUM_OF_THREADS + 2];
- int upPixelId = actualPositionInMatrix - *columns;
- int downPixelId = actualPositionInMatrix + *columns;
- secondLine[threadIdx.x + 1] = pixels[actualPositionInMatrix];
- //left side of rectangle generation
- if(threadIdx.x == 0){
- int leftPixelId = actualPositionInMatrix - 1;
- int upLeftPixelId = actualPositionInMatrix - *columns - 1;
- int downLeftPixelId = actualPositionInMatrix + *columns - 1;
- if(column != 0){
- secondLine[threadIdx.x] = pixels[leftPixelId];
- }else{
- secondLine[threadIdx.x] = 0;
- }
- if(row != 0 && column !=0){
- firstLine[threadIdx.x] = pixels[upLeftPixelId];
- }else{
- firstLine[threadIdx.x] = 0;
- }
- if(column !=0 && row != *rows - 1){
- thirdLine[threadIdx.x] = pixels[downLeftPixelId];
- }else{
- thirdLine[threadIdx.x] = 0;
- }
- }
- if(upPixelId >= 0){
- firstLine[threadIdx.x + 1] = pixels[upPixelId];
- }else{
- firstLine[threadIdx.x + 1] = 0;
- }
- if(downPixelId < matrixDimension){
- thirdLine[threadIdx.x + 1] = pixels[downPixelId];
- }else{
- thirdLine[threadIdx.x + 1] = 0;
- }
- //right side of rectangle generation
- if(threadIdx.x == NUM_OF_THREADS-1){
- //printf("i am suka \n");
- int rightPixelId = actualPositionInMatrix + 1;
- int upRightPixelId = actualPositionInMatrix - *columns + 1;
- int downRightPixelId = actualPositionInMatrix + *columns + 1;
- if(column != *columns - 1){
- secondLine[threadIdx.x + 2] = pixels[rightPixelId];
- }else{
- secondLine[threadIdx.x + 2] = 0;
- }
- if(row > 0 && column != *columns - 1){
- firstLine[threadIdx.x + 2] = pixels[upRightPixelId];
- }else{
- firstLine[threadIdx.x + 2] = 0;
- }
- if(row != *rows - 1 && column != *columns){
- thirdLine[threadIdx.x + 2] = pixels[downRightPixelId];
- }else{
- thirdLine[threadIdx.x + 2] = 0;
- }
- }
- __syncthreads();
- const int filter[3][3]={
- {0, 1, 0},
- {1, 0, -1},
- {0, -1, 0}};
- int result =
- filter[0][0] * firstLine[threadIdx.x] +
- filter[0][1] * firstLine[threadIdx.x + 1] +
- filter[0][2] * firstLine[threadIdx.x + 2] +
- filter[1][0] * secondLine[threadIdx.x] +
- filter[1][1] * secondLine[threadIdx.x + 1] +
- filter[1][2] * secondLine[threadIdx.x + 2] +
- filter[2][0] * thirdLine[threadIdx.x] +
- filter[2][1] * thirdLine[threadIdx.x + 1] +
- filter[2][2] * thirdLine[threadIdx.x + 2];
- if(result > 255){
- result = 255;
- }
- if(result < 0){
- result = 0;
- }
- output[actualPositionInMatrix] = result;
- }
- __global__ void filterImageThreeChannels(bgrPixel *pixels, bgrPixel *output, const int *rows, const int *columns) {
- //printf("%d\n", threadIdx.x);
- int actualPositionInMatrix = blockDim.x * blockIdx.x + threadIdx.x;
- int matrixDimension = *rows * *columns;
- if(actualPositionInMatrix > matrixDimension - 1){
- return;
- }
- int row = actualPositionInMatrix / *columns;
- int column = actualPositionInMatrix % *columns;
- __shared__ bgrPixel firstLine[NUM_OF_THREADS + 2];
- __shared__ bgrPixel secondLine[NUM_OF_THREADS + 2];
- __shared__ bgrPixel thirdLine[NUM_OF_THREADS + 2];
- int upPixelId = actualPositionInMatrix - *columns;
- int downPixelId = actualPositionInMatrix + *columns;
- secondLine[threadIdx.x + 1] = pixels[actualPositionInMatrix];
- //left side of rectangle generation
- if(threadIdx.x == 0){
- int leftPixelId = actualPositionInMatrix - 1;
- int upLeftPixelId = actualPositionInMatrix - *columns - 1;
- int downLeftPixelId = actualPositionInMatrix + *columns - 1;
- if(column != 0){
- secondLine[threadIdx.x] = pixels[leftPixelId];
- }else{
- secondLine[threadIdx.x].blue = 0;
- secondLine[threadIdx.x].green = 0;
- secondLine[threadIdx.x].red = 0;
- }
- if(row != 0 && column !=0){
- firstLine[threadIdx.x] = pixels[upLeftPixelId];
- }else{
- firstLine[threadIdx.x].blue = 0;
- firstLine[threadIdx.x].green = 0;
- firstLine[threadIdx.x].red = 0;
- }
- if(column !=0 && row != *rows - 1){
- thirdLine[threadIdx.x] = pixels[downLeftPixelId];
- }else{
- thirdLine[threadIdx.x].blue = 0;
- thirdLine[threadIdx.x].green = 0;
- thirdLine[threadIdx.x].red = 0;
- }
- }
- if(upPixelId >= 0){
- firstLine[threadIdx.x + 1] = pixels[upPixelId];
- }else{
- firstLine[threadIdx.x + 1].blue = 0;
- firstLine[threadIdx.x + 1].green = 0;
- firstLine[threadIdx.x + 1].red = 0;
- }
- if(downPixelId < matrixDimension){
- thirdLine[threadIdx.x + 1] = pixels[downPixelId];
- }else{
- thirdLine[threadIdx.x + 1].blue = 0;
- thirdLine[threadIdx.x + 1].green = 0;
- thirdLine[threadIdx.x + 1].red = 0;
- }
- //right side of rectangle generation
- if(threadIdx.x == NUM_OF_THREADS-1){
- //printf("i am suka \n");
- int rightPixelId = actualPositionInMatrix + 1;
- int upRightPixelId = actualPositionInMatrix - *columns + 1;
- int downRightPixelId = actualPositionInMatrix + *columns + 1;
- if(column != *columns - 1){
- secondLine[threadIdx.x + 2] = pixels[rightPixelId];
- }else{
- secondLine[threadIdx.x + 2].blue = 0;
- secondLine[threadIdx.x + 2].green = 0;
- secondLine[threadIdx.x + 2].red = 0;
- }
- if(row > 0 && column != *columns - 1){
- firstLine[threadIdx.x + 2] = pixels[upRightPixelId];
- }else{
- firstLine[threadIdx.x + 2].blue = 0;
- firstLine[threadIdx.x + 2].green = 0;
- firstLine[threadIdx.x + 2].red = 0;
- }
- if(row != *rows - 1 && column != *columns){
- thirdLine[threadIdx.x + 2] = pixels[downRightPixelId];
- }else{
- thirdLine[threadIdx.x + 2].blue = 0;
- thirdLine[threadIdx.x + 2].green = 0;
- thirdLine[threadIdx.x + 2].red = 0;
- }
- }
- __syncthreads();
- const int filter[3][3]={
- {0, 1, 0},
- {1, 0, -1},
- {0, -1, 0}};
- int resultBlue =
- filter[0][0] * firstLine[threadIdx.x].blue +
- filter[0][1] * firstLine[threadIdx.x + 1].blue +
- filter[0][2] * firstLine[threadIdx.x + 2].blue +
- filter[1][0] * secondLine[threadIdx.x].blue +
- filter[1][1] * secondLine[threadIdx.x + 1].blue +
- filter[1][2] * secondLine[threadIdx.x + 2].blue +
- filter[2][0] * thirdLine[threadIdx.x].blue +
- filter[2][1] * thirdLine[threadIdx.x + 1].blue +
- filter[2][2] * thirdLine[threadIdx.x + 2].blue;
- int resultGreen =
- filter[0][0] * firstLine[threadIdx.x].green +
- filter[0][1] * firstLine[threadIdx.x + 1].green +
- filter[0][2] * firstLine[threadIdx.x + 2].green +
- filter[1][0] * secondLine[threadIdx.x].green +
- filter[1][1] * secondLine[threadIdx.x + 1].green +
- filter[1][2] * secondLine[threadIdx.x + 2].green +
- filter[2][0] * thirdLine[threadIdx.x].green +
- filter[2][1] * thirdLine[threadIdx.x + 1].green +
- filter[2][2] * thirdLine[threadIdx.x + 2].green;
- int resultRed =
- filter[0][0] * firstLine[threadIdx.x].red +
- filter[0][1] * firstLine[threadIdx.x + 1].red +
- filter[0][2] * firstLine[threadIdx.x + 2].red +
- filter[1][0] * secondLine[threadIdx.x].red +
- filter[1][1] * secondLine[threadIdx.x + 1].red +
- filter[1][2] * secondLine[threadIdx.x + 2].red +
- filter[2][0] * thirdLine[threadIdx.x].red +
- filter[2][1] * thirdLine[threadIdx.x + 1].red +
- filter[2][2] * thirdLine[threadIdx.x + 2].red;
- if(resultBlue > 255){
- resultBlue = 255;
- }
- if(resultGreen > 255){
- resultGreen = 255;
- }
- if(resultRed > 255){
- resultRed = 255;
- }
- if(resultBlue < 0){
- resultBlue = 0;
- }
- if(resultGreen < 0){
- resultGreen = 0;
- }
- if(resultRed < 0){
- resultRed = 0;
- }
- output[actualPositionInMatrix].blue = resultBlue;
- output[actualPositionInMatrix].green = resultGreen;
- output[actualPositionInMatrix].red = resultRed;
- }
- __host__ bgrPixel *imageToPixelsBGR(cv::Mat image){
- bgrPixel *pixels = (bgrPixel *)malloc(sizeof(bgrPixel) * image.cols * image.rows);
- for (int i = 0; i < image.rows; i++){
- for(int j = 0; j < image.cols; j++){
- pixels[i * image.cols + j].blue = image.at<cv::Vec3b>(i, j)[0];
- pixels[i * image.cols + j].green = image.at<cv::Vec3b>(i, j)[1];
- pixels[i * image.cols + j].red = image.at<cv::Vec3b>(i, j)[2];
- }
- }
- return pixels;
- }
- __host__ bgrPixel *dataToBGRPixels(uchar *data, int rows, int cols){
- bgrPixel *pixels = (bgrPixel *)malloc(sizeof(bgrPixel) * cols * rows);
- for(int i = 0; i<rows * cols; i++){
- pixels[i].blue = data[i * 3];
- pixels[i].green = data[i * 3 + 1];
- pixels[i].red = data[i * 3 + 2];
- }
- return pixels;
- }
- __host__ uchar *BGRPixelsToRawData(bgrPixel *pixels, int n){
- uchar *data = (uchar *)malloc(sizeof(uchar) * n * 3);
- for(int i = 0; i<n; i++){
- data[i * 3] = pixels[i].blue;
- data[i * 3 + 1] = pixels[i].green;
- data[i * 3 + 1] = pixels[i].red;
- }
- return data;
- }
- int main()
- {
- MPI_Init(NULL, NULL);
- int rank;
- MPI_Comm_rank(MPI_COMM_WORLD, &rank);
- int namelen;
- char processorName[MPI_MAX_PROCESSOR_NAME];
- MPI_Get_processor_name(processorName, &namelen);
- if(rank == 0){
- std::cout << "My rank is " << rank << " and i am master. I am running on " << processorName << std::endl;
- DIR *dir;
- struct dirent *entry;
- dir = opendir("./images");
- if (!dir) {
- perror("diropen");
- exit(1);
- }
- int quantutyOfProcesses;
- MPI_Comm_size(MPI_COMM_WORLD, &quantutyOfProcesses);
- if(quantutyOfProcesses == 1){
- exit(1);
- }
- std::vector<processInfo> info;
- int currentProcessNumber = 1;
- while ( (entry = readdir(dir)) != NULL) {
- if(entry->d_name[0] == '.'){
- continue;
- }
- processInfo current;
- strcpy(current.name, entry->d_name);
- char path[256] = "images/";
- strcat(path, current.name);
- cv::Mat image = cv::imread(path);
- current.columns = image.cols;
- current.rows = image.rows;
- current.number = currentProcessNumber;
- current.channels = image.channels();
- //current.channels = 1;
- info.push_back(current);
- char command = 'r';
- MPI_Send(&command, 1, MPI_CHAR, currentProcessNumber, 0, MPI_COMM_WORLD);
- MPI_Send(&(current.channels), 1, MPI_INT, currentProcessNumber, 0, MPI_COMM_WORLD);
- MPI_Send(&image.rows, 1, MPI_INT, currentProcessNumber, 0, MPI_COMM_WORLD);
- MPI_Send(&image.cols, 1, MPI_INT, currentProcessNumber, 0, MPI_COMM_WORLD);
- MPI_Send(image.data, image.rows * image.cols * current.channels, MPI_UNSIGNED_CHAR, currentProcessNumber, 0, MPI_COMM_WORLD);
- currentProcessNumber++;
- if(currentProcessNumber == quantutyOfProcesses){
- currentProcessNumber = 1;
- }
- }
- closedir(dir);
- for(int i = 0; i<info.size(); i++){
- uchar *data = (uchar *)malloc(sizeof(uchar) * info[i].columns * info[i].rows * info[i].channels);
- MPI_Recv(data, info[i].columns * info[i].rows * info[i].channels, MPI_UNSIGNED_CHAR, info[i].number, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
- char path[256] = "images/";
- strcat(path, info[i].name);
- cv::Mat image = cv::imread(path);
- image.data = data;
- char savePath[256] = "filteredImages/filtered";
- strcat(savePath, info[i].name);
- cv::imwrite(savePath, image);
- }
- for(int i = 1; i<quantutyOfProcesses; i++){
- char command = 'e';
- MPI_Send(&command, 1, MPI_CHAR, i, 0, MPI_COMM_WORLD);
- }
- }else{
- while (true)
- {
- char command;
- MPI_Recv(&command, 1, MPI_CHAR, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
- if(command == 'e'){
- break;
- }
- int channels, rows, columns;
- MPI_Recv(&channels, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
- MPI_Recv(&rows, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
- MPI_Recv(&columns, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
- if(channels == 1){
- uchar *data = (uchar *)malloc(sizeof(uchar) * columns * rows);
- MPI_Recv(data, rows * columns, MPI_UNSIGNED_CHAR, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
- std::cout << "Image received on " << processorName << std::endl;
- dim3 threads(NUM_OF_THREADS);
- int blocksNum = rows * columns / NUM_OF_THREADS;
- if(rows * columns % NUM_OF_THREADS){
- blocksNum++;
- }
- dim3 blocks(blocksNum);
- uchar *deviceData, *deviceOutput;
- int *rowsDevice;
- int *columnsDevice;
- cudaMalloc(&deviceData, sizeof(uchar) * rows * columns);
- cudaMalloc(&deviceOutput, sizeof(uchar) * rows * columns);
- cudaMalloc(&rowsDevice, sizeof(int));
- cudaMalloc(&columnsDevice, sizeof(int));
- cudaMemcpy(deviceData, data, rows * columns * sizeof(uchar), cudaMemcpyHostToDevice);
- cudaMemcpy(rowsDevice, &rows, sizeof(int),cudaMemcpyHostToDevice);
- cudaMemcpy(columnsDevice, &columns, sizeof(int),cudaMemcpyHostToDevice);
- cudaEvent_t start, stop;
- cudaEventCreate(&start);
- cudaEventCreate(&stop);
- cudaEventRecord(start, 0);
- filterImageOneChannel<<<blocks, threads>>>(deviceData, deviceOutput, rowsDevice, columnsDevice);
- cudaEventRecord(stop, 0);
- cudaEventSynchronize(stop);
- float elapsedTime;
- cudaEventElapsedTime(&elapsedTime, start, stop);
- printf("time - %f", elapsedTime * 0.0001);
- cudaEventDestroy(start);
- cudaEventDestroy(stop);
- uchar* filteredData = (uchar *)malloc(sizeof(uchar) * rows * columns);
- cudaMemcpy(filteredData, deviceOutput, sizeof(uchar) * rows * columns, cudaMemcpyDeviceToHost);
- MPI_Send(filteredData, rows * columns, MPI_UNSIGNED_CHAR, 0, 0, MPI_COMM_WORLD);
- }else{
- uchar *data = (uchar *)malloc(sizeof(uchar) * columns * rows * channels);
- MPI_Recv(data, rows * columns * channels, MPI_UNSIGNED_CHAR, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
- std::cout << "Image received on " << processorName << std::endl;
- dim3 threads(NUM_OF_THREADS);
- int blocksNum = rows * columns / NUM_OF_THREADS;
- if(rows * columns % NUM_OF_THREADS){
- blocksNum++;
- }
- dim3 blocks(blocksNum);
- bgrPixel *deviceData, *deviceOutput;
- int *rowsDevice;
- int *columnsDevice;
- cudaMalloc(&deviceData, sizeof(bgrPixel) * rows * columns);
- cudaMalloc(&deviceOutput, sizeof(bgrPixel) * rows * columns);
- cudaMalloc(&rowsDevice, sizeof(int));
- cudaMalloc(&columnsDevice, sizeof(int));
- bgrPixel *dataBGR = dataToBGRPixels(data, rows, columns);
- cudaMemcpy(deviceData, dataBGR, rows * columns * sizeof(bgrPixel), cudaMemcpyHostToDevice);
- cudaMemcpy(rowsDevice, &rows, sizeof(int),cudaMemcpyHostToDevice);
- cudaMemcpy(columnsDevice, &columns, sizeof(int),cudaMemcpyHostToDevice);
- cudaEvent_t start, stop;
- cudaEventCreate(&start);
- cudaEventCreate(&stop);
- cudaEventRecord(start, 0);
- filterImageThreeChannels<<<blocks, threads>>>(deviceData, deviceOutput, rowsDevice, columnsDevice);
- cudaEventRecord(stop, 0);
- cudaEventSynchronize(stop);
- float elapsedTime;
- cudaEventElapsedTime(&elapsedTime, start, stop);
- printf("time - %f", elapsedTime * 0.0001);
- cudaEventDestroy(start);
- cudaEventDestroy(stop);
- bgrPixel* filteredDataBGR = (bgrPixel *)malloc(sizeof(bgrPixel) * rows * columns);
- cudaMemcpy(filteredDataBGR, deviceOutput, sizeof(bgrPixel) * rows * columns, cudaMemcpyDeviceToHost);
- uchar* filteredData = BGRPixelsToRawData(filteredDataBGR, rows * columns);
- MPI_Send(filteredData, rows * columns * channels, MPI_UNSIGNED_CHAR, 0, 0, MPI_COMM_WORLD);
- }
- }
- }
- MPI_Barrier(MPI_COMM_WORLD);
- return 0;
- }
Advertisement
RAW Paste Data
Copied
Advertisement