daily pastebin goal
66%
SHARE
TWEET

Untitled

a guest Mar 20th, 2019 60 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1.  
  2. #include "cuda_runtime.h"
  3. #include "device_launch_parameters.h"
  4.  
  5. #include <stdio.h>
  6. #include <stdlib.h>
  7. #include <string.h>
  8. #include <iostream>
  9.  
  10. #define N 1000
  11. #define RADIOUS 1
  12.  
  13.  
  14. //Macro for checking cuda errors following a cuda launch or api call
  15. #define cudaCheckError() {                                          \
  16.  cudaError_t e=cudaGetLastError();                                 \
  17.  if(e!=cudaSuccess) {                                              \
  18.    printf("Cuda failure %s:%d: '%s'\n",__FILE__,__LINE__,cudaGetErrorString(e));           \
  19.    exit(0); \
  20.  }                                                                 \
  21. }
  22.  
  23. void setupData(float** data, int size)
  24. {
  25.     *data = new float[size];
  26.     memset(*data, 0, sizeof(float)*size);
  27.     srand(0x00);
  28.     for (int i = 0; i < size; i++)
  29.     {
  30.         (*data)[i] = rand(); //Data está entre paréntesis para desreferenciarlo.
  31.     }
  32.  
  33. }
  34.  
  35. void filtroCPU(float* dataIn, float** dataOut, int size, int radious)
  36. {
  37.     *dataOut = new float[size];
  38.     memset(*dataOut, 0, sizeof(float)*size);
  39.     for (int i = radious; i < size - radious; i++)
  40.     {
  41.         for (int j = -radious; j <= radious; j++)
  42.         {
  43.             (*dataOut)[i] += dataIn[i - j];
  44.         }
  45.         (*dataOut)[i] /= (radious * 2 + 1);
  46.     }
  47. }
  48.  
  49.  
  50. void copyToGPU(float* h_data, float** d_data, int size)
  51. {
  52.     cudaMalloc((void**)d_data, size * sizeof(float));
  53.     cudaMemset(*d_data, 0, sizeof(float)*size);
  54.     cudaMemcpy(*d_data, h_data, sizeof(float)*size, cudaMemcpyHostToDevice);
  55.     cudaCheckError();
  56. }
  57.  
  58. void copyFromGPU(float** h_data, float* d_data, int size)
  59. {
  60.     *h_data = new float[size];
  61.     memset(*h_data, 0, sizeof(float)*size);
  62.     cudaMemcpy(*h_data, d_data, sizeof(float)*size, cudaMemcpyHostToDevice);
  63.     cudaCheckError();
  64. }
  65.  
  66.  
  67. __global__ void filtroKernel(float* dataIn, float* dataOut, int size, int radious)
  68. {
  69.     int index = threadIdx.x + blockIdx.x*blockDim.x;
  70.  
  71.     if (index >= size) return;
  72.  
  73.     for (int j = -radious; j <= radious; j++)
  74.     {
  75.         (dataOut)[index] += dataIn[index - j];
  76.     }
  77.     (dataOut)[index] /= (radious * 2 + 1);
  78.  
  79. }
  80. void filtroGPU(float* dataIn, float** dataOut, int size, int radious)
  81. {
  82.     float* d_dataIn, *d_dataOut;
  83.     // reserva memoria GPU
  84.     // copiar GPU
  85.  
  86.     copyToGPU(dataIn, &d_dataIn, size);
  87.     cudaMalloc((void**)&d_dataOut, size * sizeof(float));
  88.     cudaMemset(d_dataOut, 0, sizeof(float)*size);
  89.     cudaCheckError();
  90.     // ejecutar
  91.     int numbloques, numthreadporbloque;
  92.     numthreadporbloque = 256;
  93.     numbloques = (size / numthreadporbloque) + 1;
  94.     filtroKernel << <numbloques, numthreadporbloque >> >
  95.         (&(d_dataIn[radious]), &(d_dataOut[radious]), size-(radious*2), radious);
  96.     cudaCheckError();
  97.     cudaDeviceSynchronize();
  98.     cudaCheckError();
  99.  
  100.     // copiar a CPU
  101.     copyFromGPU(dataOut, d_dataOut, size);
  102.    
  103.  
  104.     // Liberar memoria
  105.     cudaFree(d_dataOut);
  106.     cudaFree(d_dataIn);
  107.  
  108.  
  109. }
  110.  
  111. int corrector(float* d1, float* d2, int size)
  112. {
  113.     for (int i = 0; i < size; i++)
  114.     {
  115.         if (d1[i] != d2[i])
  116.         {
  117.             std::cout << "Error en elemento " << i << " " << d1[i] << " " << d2[i] << "\n";
  118.             return 0;
  119.         }
  120.     }
  121.     return 1;
  122. }
  123.  
  124. int main(int argc, char** argv)
  125. {
  126.     float* dataIn, *dataOutCPU, *dataOutGPU;
  127.  
  128.     setupData(&dataIn, N);
  129.     filtroCPU(dataIn, &dataOutCPU, N, RADIOUS);
  130.     filtroGPU(dataIn, &dataOutGPU, N, RADIOUS);
  131.     if (!corrector(dataOutCPU, dataOutGPU, N))
  132.         std::cout << "Error\n";
  133.  
  134.     delete dataOutCPU;
  135.     delete dataOutGPU;
  136.     delete dataIn;
  137.  
  138. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top