Untitled

#include <iostream>
#include <fstream>
#include <vector>
#include <cmath>
#include <cuda.h>

using namespace std;

void FilterCreation(double* GKernel, const int &n) {
    int t = n / 2;
    double sigma = 10.0;
    double r, s = 2.0 * sigma * sigma;
    // sum is for normalization
    double sum = 0.0;
    // generating 5x5 kernel
    for (int x = -t; x <= t; x++) {
        for (int y = -t; y <= t; y++) {
            r = sqrt(x * x + y * y);
            GKernel[(x + t) * n + (y + t)] = (exp(-(r * r) / s)) / (M_PI * s);
            sum += GKernel[(x + t) * n + (y + t)];
        }
    }

    // normalising the Kernel
    for (int i = 0; i < n; ++i)
        for (int j = 0; j < n; ++j)
            GKernel[i * n + j] /= sum;
}

__global__ void
blured(
        int n,
        int *img,
        int *img_blured,
        double *GKernel) {
    double pixel = 0.;
    int t = n / 2, h = gridDim.x, w = gridDim.y;
    if (blockIdx.x >= t && blockIdx.x < h - t && blockIdx.y >= t && blockIdx.y < w - t) {
        int x = blockIdx.z * gridDim.x * gridDim.y + blockIdx.y * gridDim.x + blockIdx.x;
        for (int i = -t; i <= t; ++i) {
            for (int j = -t; j <= t; ++j) {
                int y = blockIdx.z * gridDim.x * gridDim.y + (blockIdx.x + i) * gridDim.y + (blockIdx.y + j);
                pixel +=  img[y] * GKernel[(t + i) * n + (t + j)];
            }
        }
        img_blured[x] = static_cast<int> (pixel);
    }
//    img_blured[blockIdx.z * gridDim.x * gridDim.y + blockIdx.x * gridDim.y + blockIdx.y] = 1;
}

int main() {
    int n = 15;
    int t = n / 2;
    double *GKernel, *d_GKernel;
    GKernel = (double*)malloc(n * n * sizeof(double));
    FilterCreation(GKernel, n);
    int h, w;
    ifstream in("test.txt");
    in >> h >> w;
    int N = 3 * h * w;
    int  *img, *d_img, *img_blured, *d_img_blured;
    img = (int*)malloc(N*sizeof(int));
    img_blured = (int*)malloc(N*sizeof(int));
    for (int i = 0; i != N; ++i)
        img_blured[i] = 0.;
    for (int k = 0; k != 3; ++k) {
        for (int i = 0; i != h; ++i) {
            for (int j = 0; j != w; ++j) {
                in >> img[k * h * w + i * w + j];
            }
        }
    }
    in.close();
    dim3 grid(h, w, 3);
    dim3 block(1, 1, 1);
    cudaMalloc(&d_img, N*sizeof(int));
    cudaMalloc(&d_img_blured, N*sizeof(int));
    cudaMalloc(&d_GKernel, N*sizeof(double));
    cudaMemcpy(d_img, img, N*sizeof(int), cudaMemcpyHostToDevice);
    cudaMemcpy(d_img_blured, img_blured, N*sizeof(int), cudaMemcpyHostToDevice);
    cudaMemcpy(d_GKernel, GKernel, N*sizeof(double), cudaMemcpyHostToDevice);
    blured<<<grid, block>>>(n, d_img, d_img_blured, d_GKernel);
    cudaMemcpy(img_blured, d_img_blured, N*sizeof(int), cudaMemcpyDeviceToHost);
    cout << h << " " << w << "\n";
    for (int k = 0; k != 3; ++k) {
        for (int i = 0; i != h; ++i) {
            for (int j = 0; j != w; ++j) {
                cout << img_blured[k * h * w + i * w + j] << " ";
            }
            cout << "\n";
        }
    }
    cudaFree(d_img);
    cudaFree(d_img_blured);
    cudaFree(d_GKernel);
    free(GKernel);
    free(img);
    free(img_blured);
    // out.close();
}