Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // Histogram Equalization
- #include <wb.h>
- #define BLOCK_SIZE 256
- #define BLOCK_WIDTH 16
- //@@ insert code here
- //First kernel converts the data from float to unsigned char
- __global__
- void floatToChar(float *input, unsigned char *output, int len){
- int i = blockIdx.x * blockDim.x + threadIdx.x;
- if(i < len)
- output[i] = (unsigned char) 255*input[i];
- }
- //Second kernel converts the data from rgb to grayScale image
- __global__
- void rgbToGray(unsigned char *input, unsigned char *output, int size){
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- unsigned char r,g,b;
- //checking bounds
- if(i < size){
- //we have 3 channels for RGB values
- r = input[i * 3];
- g = input[i * 3 + 1];
- b = input[i * 3 + 2];
- //set intensity values for grayScale
- output[i] = (unsigned char) (0.21f*r + 0.71f*g + 0.07f*b);
- }
- }
- //Third kernel that calculates the histogram
- __global__
- void pixelHisto(unsigned char *input, unsigned int *output, int len){
- __shared__ unsigned int privHisto[256];
- if(threadIdx.x < 256)
- privHisto[threadIdx.x] = 0;
- __syncthreads();
- int i = blockDim.x * blockIdx.x + threadIdx.x;
- int stride = blockDim.x * gridDim.x;
- while(i<len){
- atomicAdd(&privHisto[input[i]], 1);
- i += stride;
- }
- __syncthreads();
- if(threadIdx.x < 256)
- atomicAdd(&output[threadIdx.x], privHisto[threadIdx.x]);
- }
- //Fourth kernel that calculates the correct color of the image
- __global__
- void equalizeImage(unsigned char *input, float *output, float *cdf, int len){
- int i = blockIdx.x * blockDim.x + threadIdx.x;
- float ans = (float) 255*(cdf[input[i]] - cdf[0])/(1.0-cdf[0]);
- unsigned char charAns = (unsigned char) ans;
- unsigned char var = (ans < 0)?0:(ans > 255)?255: charAns;
- if(i < len)
- output[i] = (float) var/255.0;
- }
- int main(int argc, char **argv) {
- wbArg_t args;
- int imageWidth;
- int imageHeight;
- int imageChannels;
- wbImage_t inputImage;
- wbImage_t outputImage;
- float *hostInput;
- float *hostOutput;
- float *deviceInput;
- //float *deviceOutput;
- float *cdf;
- float *deviceCDF;
- unsigned char *grayDeviceInput;
- unsigned char *charDeviceInput;
- unsigned int *deviceHisto;
- unsigned int *hostHisto;
- const char *inputImageFile;
- //@@ Insert more code here
- args = wbArg_read(argc, argv); /* parse the input arguments */
- inputImageFile = wbArg_getInputFile(args, 0);
- wbTime_start(Generic, "Importing data and creating memory on host");
- inputImage = wbImport(inputImageFile);
- imageWidth = wbImage_getWidth(inputImage);
- imageHeight = wbImage_getHeight(inputImage);
- imageChannels = wbImage_getChannels(inputImage);
- outputImage = wbImage_new(imageWidth, imageHeight, imageChannels);
- wbTime_stop(Generic, "Importing data and creating memory on host");
- //@@ insert code here
- //size of image (without channel rgb)
- int imageSize = imageHeight * imageWidth;
- int rgbImageSize = imageSize * imageChannels;
- //get data pointer to image input and output
- hostInput = wbImage_getData(inputImage);
- //allocate device and host memory for the whole program
- cudaMalloc((void **) &deviceInput, rgbImageSize * sizeof(float));
- cudaMalloc((void **) &grayDeviceInput, imageSize * sizeof(unsigned char));
- cudaMalloc((void **) &charDeviceInput, rgbImageSize * sizeof(unsigned char));
- cudaMalloc((void **) &deviceHisto, BLOCK_SIZE * sizeof(unsigned int));
- cudaMalloc((void **) &deviceCDF, BLOCK_SIZE * sizeof(float));
- hostHisto = (unsigned int *) malloc(BLOCK_SIZE * sizeof(unsigned int));
- cdf = (float *) malloc(BLOCK_SIZE * sizeof(float));
- hostOutput = (float *) malloc(rgbImageSize * sizeof(float));
- //copy host memory to device memory
- cudaMemcpy(deviceInput, hostInput, rgbImageSize * sizeof(float), cudaMemcpyHostToDevice);
- //declaring block and grid size for first 1D kernel
- dim3 dimGrid1(ceil((float) (imageSize * imageChannels)/BLOCK_SIZE));
- dim3 dimBlock1(BLOCK_SIZE);
- //convert float to unsigned char kernel1
- floatToChar <<<dimGrid1, dimBlock1>>> (deviceInput, charDeviceInput, rgbImageSize);
- //we get sunsigned char *charDeviceInput
- //declaring block and grid size for second 1D kernel
- dim3 dimGrid2(ceil((float) imageSize/BLOCK_SIZE));
- dim3 dimBlock2(BLOCK_SIZE);
- rgbToGray <<<dimGrid2, dimBlock2>>> (charDeviceInput, grayDeviceInput, imageWidth*imageHeight);
- //declaring block and grid size for the third 1D kernel
- dim3 dimGrid3(ceil((float) imageSize/BLOCK_SIZE));
- dim3 dimBlock3(BLOCK_SIZE);
- pixelHisto <<<dimGrid3, dimBlock3>>> (grayDeviceInput, deviceHisto, imageSize);
- //perform cdf calc on host
- cudaMemcpy(hostHisto, deviceHisto, BLOCK_SIZE * sizeof(unsigned int), cudaMemcpyDeviceToHost);
- cdf[0] = (float) hostHisto[0]/imageSize;
- for(int i = 1; i<256; ++i){
- cdf[i] = cdf[i-1] + (float) hostHisto[i]/imageSize;
- }
- cudaMemcpy(deviceCDF, cdf, BLOCK_SIZE * sizeof(float), cudaMemcpyHostToDevice);
- //Using same block_size as earlier
- equalizeImage <<<dimGrid1, dimBlock1>>> (charDeviceInput, deviceInput, deviceCDF, rgbImageSize);
- cudaMemcpy(hostOutput, deviceInput, rgbImageSize * sizeof(float), cudaMemcpyDeviceToHost);
- wbImage_setData(outputImage, hostOutput);
- wbSolution(args, outputImage);
- //@@ insert code here
- free(hostHisto);
- free(cdf);
- cudaFree(deviceInput);
- cudaFree(grayDeviceInput);
- cudaFree(charDeviceInput);
- cudaFree(deviceHisto);
- cudaFree(deviceCDF);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement