Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- pixels = fileSizeBytes / 2;
- heightPx = 3040;
- widthPx = 4096;
- cudaMalloc(&d_inp, pixels*4*sizeof(ushort));
- d_img = d_inp + pixels;
- cudaMemcpy(d_inp, h_img , pixels*sizeof(ushort), cudaMemcpyHostToDevice);
- dim3 block(widthPx/16, heightPx/16);
- dim3 threads(16,16);
- bayerRG <<<block,threads>>>(d_img, d_inp, widthPx,heightPx);
- cudaMemcpy(gpu_output, d_img, pixels*3*sizeof(ushort), cudaMemcpyDeviceToHost);
- cv::Mat outputMat_16UC3CUDA = cv::Mat( 3040, 4096, CV_16UC3, gpu_output );
- __global__ void bayerRG(ushort *d_img, ushort *d_inp, uint width, uint height)
- {
- uint x = (blockIdx.x* blockDim.x + threadIdx.x);
- uint y = (blockIdx.y* blockDim.y + threadIdx.y);
- uint img_i = y * width*3 + x*3; //3 channels in image
- uint inp_i = (y*width +x); //1 channel in input
- d_img[img_i] = d_inp[inp_i];
- d_img[img_i + 1] = d_inp[inp_i];
- d_img[img_i + 2] = d_inp[inp_i];
- }
Add Comment
Please, Sign In to add comment