daily pastebin goal
42%
SHARE
TWEET

Untitled

a guest Aug 21st, 2018 61 Never
Upgrade to PRO!
ENDING IN00days00hours00mins00secs
  1. #include "cuda_runtime.h"
  2. #include "device_launch_parameters.h"
  3.  
  4. #include <stdio.h>
  5. #include <ctime>
  6. #include <iostream>
  7. #include <cmath>
  8. #include <algorithm>
  9. #include <cooperative_groups.h>
  10. #include <assert.h>
  11. #include <memory>
  12. #pragma once
  13. #ifdef __INTELLISENSE__
  14. void __syncthreads();
  15. #endif
  16.  
  17. namespace cg = cooperative_groups;
  18.  
  19. cudaError_t addWithCuda(int *c, const int *a, unsigned int size);
  20.  
  21.  
  22. __global__ void reduction(int *g_idata, int *g_odata) {
  23.     // Shared Data (sdata) is allocated in kernel call as 3rd arg
  24.     extern __shared__ float sdata[];
  25.     int myId = threadIdx.x + blockDim.x * blockIdx.x;
  26.     int tid = threadIdx.x;
  27.     // load  all the data from global memory to shared memory
  28.     sdata[tid] = g_idata[myId];
  29.     __syncthreads();  //Make sure the entire block is loaded
  30.     for (unsigned int i = blockDim.x / 2; i > 0; i >>= 1) {
  31.         if (tid < i) {
  32.             g_idata[myId] += g_idata[myId + i];
  33.             //sdata[tid] += sdata[tid + i];
  34.  
  35.         }
  36.         __syncthreads();
  37.     }
  38.     if (tid == 0) {
  39.         //g_odata[blockIdx.x] = sdata[0];
  40.         g_odata[blockIdx.x] = g_idata[myId];
  41.     }
  42. }
  43. void remplir(int *arr, int length, int arrayval)
  44. {
  45.     for (int i = 0; i < length; ++i) {
  46.         arr[i] = arrayval;
  47.     }
  48. }
  49. void afficherTab(int *arr, int length)
  50. {
  51.     for (int i = 0; i < length; ++i) {
  52.         printf("%d ", arr[i]);
  53.         printf("\n");
  54.     }
  55.  
  56. }
  57. int main()
  58. {
  59.     const int arraySize = 128 * 1024;
  60.     static int a[arraySize];
  61.     //std::unique_ptr<int[]> a = std::unique_ptr<int[]>(new int[arraySize]);
  62.     //int a[arraySize];
  63.     static int c[arraySize] = { 0 };
  64.     remplir(a, arraySize, 1);
  65.  
  66.     // Add vectors in parallel.
  67.     cudaError_t cudaStatus = addWithCuda(c, a, arraySize);
  68.     if (cudaStatus != cudaSuccess) {
  69.         fprintf(stderr, "addWithCuda failed!");
  70.         return 1;
  71.     }
  72.  
  73.     //afficherTab(c, arraySize / 512);
  74.     afficherTab(c, arraySize / 512);
  75.     system("PAUSE");
  76.     // cudaDeviceReset must be called before exiting in order for profiling and
  77.     // tracing tools such as Nsight and Visual Profiler to show complete traces.
  78.  
  79.  
  80.     cudaStatus = cudaDeviceReset();
  81.     if (cudaStatus != cudaSuccess) {
  82.         fprintf(stderr, "cudaDeviceReset failed!");
  83.         return 1;
  84.     }
  85.  
  86.  
  87.     return 0;
  88. }
  89.  
  90. // Helper function for using CUDA to add vectors in parallel.
  91. cudaError_t addWithCuda(int *c, const int *a, unsigned int size)
  92. {
  93.     int *dev_a = 0;
  94.     int *dev_c = 0;
  95.     cudaError_t cudaStatus;
  96.     //declaration pour la mesure du temp
  97.     cudaEvent_t start, stop;
  98.     cudaEventCreate(&start);
  99.     cudaEventCreate(&stop);
  100.  
  101.     // Choose which GPU to run on, change this on a multi-GPU system.
  102.     cudaStatus = cudaSetDevice(0);
  103.     if (cudaStatus != cudaSuccess) {
  104.         fprintf(stderr, "cudaSetDevice failed!  Do you have a CUDA-capable GPU installed?");
  105.         goto Error;
  106.     }
  107.  
  108.     // Allocate GPU buffers for three vectors (two input, one output)    .
  109.     cudaStatus = cudaMalloc((void**)&dev_c, size * sizeof(int));
  110.     if (cudaStatus != cudaSuccess) {
  111.         fprintf(stderr, "cudaMalloc failed!");
  112.         goto Error;
  113.     }
  114.  
  115.     cudaStatus = cudaMalloc((void**)&dev_a, size * sizeof(int));
  116.     if (cudaStatus != cudaSuccess) {
  117.         fprintf(stderr, "cudaMalloc failed!");
  118.         goto Error;
  119.     }
  120.  
  121.  
  122.     // Copy input vectors from host memory to GPU buffers.
  123.     cudaStatus = cudaMemcpy(dev_a, a, size * sizeof(int), cudaMemcpyHostToDevice);
  124.     if (cudaStatus != cudaSuccess) {
  125.         fprintf(stderr, "cudaMemcpy failed!");
  126.         goto Error;
  127.     }
  128.  
  129.  
  130.     // Launch a kernel on the GPU with one thread for each element.
  131.     //addKernel<<<1, size>>>(dev_c, dev_a, dev_b);
  132.     const int threads = 1024;
  133.     const int nbreblock = size / threads;
  134.     cudaEventRecord(start); // demarre le compteur
  135.     reduction << <nbreblock, threads, threads * sizeof(int) >> >(dev_a, dev_c);
  136.     cudaEventRecord(stop); // arret du compteur
  137.  
  138.                            // Check for any errors launching the kernel
  139.     cudaStatus = cudaGetLastError();
  140.     if (cudaStatus != cudaSuccess) {
  141.         fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
  142.         goto Error;
  143.     }
  144.  
  145.     // cudaDeviceSynchronize waits for the kernel to finish, and returns
  146.     // any errors encountered during the launch.
  147.     cudaStatus = cudaDeviceSynchronize();
  148.     if (cudaStatus != cudaSuccess) {
  149.         fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus);
  150.         goto Error;
  151.     }
  152.  
  153.     // Copy output vector from GPU buffer to host memory.
  154.     cudaStatus = cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost);
  155.     if (cudaStatus != cudaSuccess) {
  156.         fprintf(stderr, "cudaMemcpy failed!");
  157.         goto Error;
  158.     }
  159.  
  160.     cudaEventSynchronize(stop); // sync time
  161.     float milliseconds = 0;
  162.     cudaEventElapsedTime(&milliseconds, start, stop);
  163.     std::cout << " temp = " << milliseconds << std::endl;
  164.  
  165.  
  166. Error:
  167.     cudaFree(dev_c);
  168.     cudaFree(dev_a);
  169.  
  170.     return cudaStatus;
  171. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top