Advertisement
Guest User

Untitled

a guest
Oct 22nd, 2017
143
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 5.56 KB | None | 0 0
  1. // Histogram Equalization
  2.  
  3. #include <wb.h>
  4.  
  5. #define BLOCK_SIZE 256
  6. #define BLOCK_WIDTH 16
  7.  
  8. //@@ insert code here
  9.  
  10. //First kernel converts the data from float to unsigned char
  11. __global__
  12. void floatToChar(float *input, unsigned char *output, int len){
  13.      int i = blockIdx.x * blockDim.x + threadIdx.x;
  14.      if(i < len)
  15.           output[i] = (unsigned char) 255*input[i];
  16. }
  17.  
  18. //Second kernel converts the data from rgb to grayScale image
  19. __global__
  20. void rgbToGray(unsigned char *input, unsigned char *output, int size){
  21.      int i = blockDim.x * blockIdx.x + threadIdx.x;
  22.      unsigned char r,g,b;
  23.      //checking bounds
  24.      if(i < size){
  25.           //we have 3 channels for RGB values
  26.           r = input[i * 3];
  27.           g = input[i * 3 + 1];
  28.           b = input[i * 3 + 2];
  29.           //set intensity values for grayScale
  30.           output[i] = (unsigned char) (0.21f*r + 0.71f*g + 0.07f*b);
  31.      }
  32. }
  33.  
  34. //Third kernel that calculates the histogram
  35. __global__
  36. void pixelHisto(unsigned char *input, unsigned int *output, int len){
  37.      __shared__ unsigned int privHisto[256];
  38.      if(threadIdx.x < 256)
  39.           privHisto[threadIdx.x] = 0;
  40.      __syncthreads();
  41.      int i = blockDim.x * blockIdx.x + threadIdx.x;
  42.      int stride = blockDim.x * gridDim.x;
  43.      while(i<len){
  44.           atomicAdd(&privHisto[input[i]], 1);
  45.           i += stride;
  46.      }
  47.      __syncthreads();
  48.      if(threadIdx.x < 256)
  49.           atomicAdd(&output[threadIdx.x], privHisto[threadIdx.x]);
  50. }
  51.  
  52. //Fourth kernel that calculates the correct color of the image
  53. __global__
  54. void equalizeImage(unsigned char *input, float *output, float *cdf, int len){
  55.      int i = blockIdx.x * blockDim.x + threadIdx.x;
  56.      float ans = (float) 255*(cdf[input[i]] - cdf[0])/(1.0-cdf[0]);
  57.      unsigned char charAns = (unsigned char) ans;
  58.      unsigned char var = (ans < 0)?0:(ans > 255)?255: charAns;
  59.      if(i < len)
  60.           output[i] = (float) var/255.0;
  61. }
  62.  
  63.  
  64. int main(int argc, char **argv) {
  65.   wbArg_t args;
  66.   int imageWidth;
  67.   int imageHeight;
  68.   int imageChannels;
  69.   wbImage_t inputImage;
  70.   wbImage_t outputImage;
  71.   float *hostInput;
  72.   float *hostOutput;
  73.   float *deviceInput;
  74.   //float *deviceOutput;
  75.   float *cdf;
  76.   float *deviceCDF;
  77.   unsigned char *grayDeviceInput;
  78.   unsigned char *charDeviceInput;
  79.   unsigned int *deviceHisto;
  80.   unsigned int *hostHisto;
  81.   const char *inputImageFile;
  82.  
  83.   //@@ Insert more code here
  84.  
  85.   args = wbArg_read(argc, argv); /* parse the input arguments */
  86.  
  87.   inputImageFile = wbArg_getInputFile(args, 0);
  88.  
  89.   wbTime_start(Generic, "Importing data and creating memory on host");
  90.   inputImage = wbImport(inputImageFile);
  91.   imageWidth = wbImage_getWidth(inputImage);
  92.   imageHeight = wbImage_getHeight(inputImage);
  93.   imageChannels = wbImage_getChannels(inputImage);
  94.   outputImage = wbImage_new(imageWidth, imageHeight, imageChannels);
  95.   wbTime_stop(Generic, "Importing data and creating memory on host");
  96.  
  97.   //@@ insert code here
  98.   //size of image (without channel rgb)
  99.   int imageSize = imageHeight * imageWidth;
  100.   int rgbImageSize = imageSize * imageChannels;
  101.  
  102.   //get data pointer to image input and output
  103.   hostInput = wbImage_getData(inputImage);
  104.  
  105.   //allocate device and host memory for the whole program
  106.   cudaMalloc((void **) &deviceInput, rgbImageSize * sizeof(float));
  107.   cudaMalloc((void **) &grayDeviceInput, imageSize * sizeof(unsigned char));
  108.   cudaMalloc((void **) &charDeviceInput, rgbImageSize * sizeof(unsigned char));
  109.   cudaMalloc((void **) &deviceHisto, BLOCK_SIZE * sizeof(unsigned int));
  110.   cudaMalloc((void **) &deviceCDF, BLOCK_SIZE * sizeof(float));
  111.   hostHisto = (unsigned int *) malloc(BLOCK_SIZE * sizeof(unsigned int));
  112.   cdf = (float *) malloc(BLOCK_SIZE * sizeof(float));
  113.   hostOutput = (float *) malloc(rgbImageSize * sizeof(float));
  114.  
  115.   //copy host memory to device memory
  116.   cudaMemcpy(deviceInput, hostInput, rgbImageSize * sizeof(float), cudaMemcpyHostToDevice);
  117.  
  118.   //declaring block and grid size for first 1D kernel
  119.   dim3 dimGrid1(ceil((float) (imageSize * imageChannels)/BLOCK_SIZE));
  120.   dim3 dimBlock1(BLOCK_SIZE);
  121.   //convert float to unsigned char kernel1
  122.   floatToChar <<<dimGrid1, dimBlock1>>> (deviceInput, charDeviceInput, rgbImageSize);
  123.   //we get sunsigned char *charDeviceInput
  124.  
  125.  
  126.   //declaring block and grid size for second 1D kernel
  127.   dim3 dimGrid2(ceil((float) imageSize/BLOCK_SIZE));
  128.   dim3 dimBlock2(BLOCK_SIZE);
  129.   rgbToGray <<<dimGrid2, dimBlock2>>> (charDeviceInput, grayDeviceInput, imageWidth*imageHeight);
  130.  
  131.  
  132.   //declaring block and grid size for the third 1D kernel
  133.   dim3 dimGrid3(ceil((float) imageSize/BLOCK_SIZE));
  134.   dim3 dimBlock3(BLOCK_SIZE);
  135.   pixelHisto <<<dimGrid3, dimBlock3>>> (grayDeviceInput, deviceHisto, imageSize);
  136.  
  137.  
  138.   //perform cdf calc on host
  139.   cudaMemcpy(hostHisto, deviceHisto, BLOCK_SIZE * sizeof(unsigned int), cudaMemcpyDeviceToHost);
  140.   cdf[0] = (float) hostHisto[0]/imageSize;
  141.   for(int i = 1; i<256; ++i){
  142.        cdf[i] = cdf[i-1] + (float) hostHisto[i]/imageSize;
  143.   }
  144.   cudaMemcpy(deviceCDF, cdf, BLOCK_SIZE * sizeof(float), cudaMemcpyHostToDevice);
  145.   //Using same block_size as earlier
  146.   equalizeImage <<<dimGrid1, dimBlock1>>> (charDeviceInput, deviceInput, deviceCDF, rgbImageSize);
  147.  
  148.  
  149.   cudaMemcpy(hostOutput, deviceInput, rgbImageSize * sizeof(float), cudaMemcpyDeviceToHost);
  150.   wbImage_setData(outputImage, hostOutput);
  151.   wbSolution(args, outputImage);
  152.  
  153.   //@@ insert code here
  154.   free(hostHisto);
  155.   free(cdf);
  156.   cudaFree(deviceInput);
  157.   cudaFree(grayDeviceInput);
  158.   cudaFree(charDeviceInput);
  159.   cudaFree(deviceHisto);
  160.   cudaFree(deviceCDF);
  161.  
  162.   return 0;
  163. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement