Advertisement
lightxx

Global Memory Atomics

Aug 19th, 2014
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C# 4.41 KB | None | 0 0
  1.     /// <summary>
  2.     /// The program.
  3.     /// </summary>
  4.     public class Program {
  5.         /// <summary>
  6.         /// The size of the image data.
  7.         /// </summary>
  8.         public const int Size = 100 * 1024 * 1024;
  9.  
  10.         /// <summary>
  11.         /// The CalculateHistogramAsKernel method.
  12.         /// </summary>
  13.         /// <param name="thread">
  14.         /// The thread.
  15.         /// </param>
  16.         /// <param name="buffer">
  17.         /// The buffer containing the image data.
  18.         /// </param>
  19.         /// <param name="size">
  20.         /// The size of the image data.
  21.         /// </param>
  22.         /// <param name="histo">
  23.         /// The histogram data.
  24.         /// </param>
  25.         [Cudafy]
  26.         public static void CalculateHistogramAsKernel(GThread thread, byte[] buffer, int size, uint[] histo) {
  27.             // calculate the starting index and the offset to the next
  28.             // block that each thread will be processing
  29.             var i = thread.threadIdx.x + (thread.blockIdx.x * thread.blockDim.x);
  30.             var stride = thread.blockDim.x * thread.gridDim.x;
  31.  
  32.             while (i < size) {
  33.                 thread.atomicAdd(ref histo[buffer[i]], 1);
  34.                 i += stride;
  35.             }
  36.         }
  37.  
  38.         /// <summary>
  39.         /// The main method.
  40.         /// </summary>
  41.         /// <returns>
  42.         /// The <see cref="int"/>.
  43.         /// </returns>
  44.         public static int Main() {
  45.             // Set the Target to Cuda
  46.             CudafyModes.Target = eGPUType.Cuda;
  47.  
  48.             var cudafyModule = CudafyTranslator.Cudafy();
  49.  
  50.             var gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
  51.             if (gpu is CudaGPU && gpu.GetDeviceProperties().Capability < new Version(1, 2)) {
  52.                 Console.WriteLine("Compute capability 1.2 or higher required for atomics.");
  53.                 return -1;
  54.             }
  55.  
  56.             gpu.LoadModule(cudafyModule);
  57.  
  58.             // define a buffer to hold the random data
  59.             var buffer = new byte[Size];
  60.  
  61.             // fill the buffer with random bytes from the Xorshift RNG
  62.             var rndXorshift = new RndXorshift();
  63.             rndXorshift.NextBytes(buffer);
  64.  
  65.             GPGPUProperties prop;
  66.             try {
  67.                 prop = gpu.GetDeviceProperties();
  68.             } catch (DllNotFoundException) {
  69.                 // thrown if the CUDA library cannot be found
  70.                 prop = gpu.GetDeviceProperties(false);
  71.             }
  72.  
  73.             // capture the start time
  74.             // starting the timer here so that we include the cost of
  75.             // all of the operations on the GPU.  if the data were
  76.             // already on the GPU and we just timed the kernel
  77.             // the timing would drop from 74 ms to 15 ms.  Very fast.
  78.             gpu.StartTimer();
  79.  
  80.             // allocate memory on the GPU for the file's data
  81.             byte[] devBuffer = gpu.CopyToDevice(buffer);
  82.             uint[] devHisto = gpu.Allocate<uint>(256);
  83.             gpu.Set(devHisto);
  84.  
  85.             // kernel launch - 2x the number of processor count seems reasonable          
  86.             var processorCount = prop.MultiProcessorCount;
  87.  
  88.             // in case we're running on the emulator
  89.             if (processorCount == 0)
  90.                 processorCount = 16;
  91.  
  92.             Console.WriteLine("Processors: {0}", processorCount);
  93.  
  94.             // dynamically invoke the CalculateHistogramAsKernel method
  95.             gpu.Launch(processorCount * 2, 256).CalculateHistogramAsKernel(devBuffer, Size, devHisto);
  96.  
  97.             var histo = new uint[256];
  98.             gpu.CopyFromDevice(devHisto, histo);
  99.  
  100.             // get stop time, and display the timing results
  101.             var elapsedTime = gpu.StopTimer();
  102.             Console.WriteLine("Time to generate: {0} ms", elapsedTime);
  103.  
  104.             long histoCount = 0;
  105.             for (int i = 0; i < 256; i++) {
  106.                 histoCount += histo[i];
  107.             }
  108.  
  109.             Console.WriteLine("Histogram Sum:  {0}", histoCount);
  110.  
  111.             // verify that we have the same counts via CPU
  112.             for (int i = 0; i < Size; i++)
  113.                 histo[buffer[i]]--;
  114.  
  115.             for (int i = 0; i < 256; i++) {
  116.                 if (histo[i] != 0)
  117.                     Console.WriteLine("Failure at {0}!", i);
  118.             }
  119.  
  120.             gpu.FreeAll();
  121.  
  122.             Console.ReadLine();
  123.             return 0;
  124.         }
  125.     }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement