Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include "cuda_runtime.h"
- #include "device_launch_parameters.h"
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <iostream>
- #define N 1000
- #define RADIOUS 1
- //Macro for checking cuda errors following a cuda launch or api call
- #define cudaCheckError() { \
- cudaError_t e=cudaGetLastError(); \
- if(e!=cudaSuccess) { \
- printf("Cuda failure %s:%d: '%s'\n",__FILE__,__LINE__,cudaGetErrorString(e)); \
- exit(0); \
- } \
- }
- void setupData(float** data, int size)
- {
- *data = new float[size];
- memset(*data, 0, sizeof(float)*size);
- srand(0x00);
- for (int i = 0; i < size; i++)
- {
- (*data)[i] = rand(); //Data está entre paréntesis para desreferenciarlo.
- }
- }
- void filtroCPU(float* dataIn, float** dataOut, int size, int radious)
- {
- *dataOut = new float[size];
- memset(*dataOut, 0, sizeof(float)*size);
- for (int i = radious; i < size - radious; i++)
- {
- for (int j = -radious; j <= radious; j++)
- {
- (*dataOut)[i] += dataIn[i - j];
- }
- (*dataOut)[i] /= (radious * 2 + 1);
- }
- }
- void copyToGPU(float* h_data, float** d_data, int size)
- {
- cudaMalloc((void**)d_data, size * sizeof(float));
- cudaMemset(*d_data, 0, sizeof(float)*size);
- cudaMemcpy(*d_data, h_data, sizeof(float)*size, cudaMemcpyHostToDevice);
- cudaCheckError();
- }
- void copyFromGPU(float** h_data, float* d_data, int size)
- {
- *h_data = new float[size];
- memset(*h_data, 0, sizeof(float)*size);
- cudaMemcpy(*h_data, d_data, sizeof(float)*size, cudaMemcpyHostToDevice);
- cudaCheckError();
- }
- __global__ void filtroKernel(float* dataIn, float* dataOut, int size, int radious)
- {
- int index = threadIdx.x + blockIdx.x*blockDim.x;
- if (index >= size) return;
- for (int j = -radious; j <= radious; j++)
- {
- (dataOut)[index] += dataIn[index - j];
- }
- (dataOut)[index] /= (radious * 2 + 1);
- }
- void filtroGPU(float* dataIn, float** dataOut, int size, int radious)
- {
- float* d_dataIn, *d_dataOut;
- // reserva memoria GPU
- // copiar GPU
- copyToGPU(dataIn, &d_dataIn, size);
- cudaMalloc((void**)&d_dataOut, size * sizeof(float));
- cudaMemset(d_dataOut, 0, sizeof(float)*size);
- cudaCheckError();
- // ejecutar
- int numbloques, numthreadporbloque;
- numthreadporbloque = 256;
- numbloques = (size / numthreadporbloque) + 1;
- filtroKernel << <numbloques, numthreadporbloque >> >
- (&(d_dataIn[radious]), &(d_dataOut[radious]), size-(radious*2), radious);
- cudaCheckError();
- cudaDeviceSynchronize();
- cudaCheckError();
- // copiar a CPU
- copyFromGPU(dataOut, d_dataOut, size);
- // Liberar memoria
- cudaFree(d_dataOut);
- cudaFree(d_dataIn);
- }
- int corrector(float* d1, float* d2, int size)
- {
- for (int i = 0; i < size; i++)
- {
- if (d1[i] != d2[i])
- {
- std::cout << "Error en elemento " << i << " " << d1[i] << " " << d2[i] << "\n";
- return 0;
- }
- }
- return 1;
- }
- int main(int argc, char** argv)
- {
- float* dataIn, *dataOutCPU, *dataOutGPU;
- setupData(&dataIn, N);
- filtroCPU(dataIn, &dataOutCPU, N, RADIOUS);
- filtroGPU(dataIn, &dataOutGPU, N, RADIOUS);
- if (!corrector(dataOutCPU, dataOutGPU, N))
- std::cout << "Error\n";
- delete dataOutCPU;
- delete dataOutGPU;
- delete dataIn;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement