Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /// <summary>
- /// The program.
- /// </summary>
- public class Program {
- /// <summary>
- /// The size of the image data.
- /// </summary>
- public const int Size = 100 * 1024 * 1024;
- /// <summary>
- /// The CalculateHistogramAsKernel method.
- /// </summary>
- /// <param name="thread">
- /// The thread.
- /// </param>
- /// <param name="buffer">
- /// The buffer containing the image data.
- /// </param>
- /// <param name="size">
- /// The size of the image data.
- /// </param>
- /// <param name="histo">
- /// The histogram data.
- /// </param>
- [Cudafy]
- public static void CalculateHistogramAsKernel(GThread thread, byte[] buffer, int size, uint[] histo) {
- // calculate the starting index and the offset to the next
- // block that each thread will be processing
- var i = thread.threadIdx.x + (thread.blockIdx.x * thread.blockDim.x);
- var stride = thread.blockDim.x * thread.gridDim.x;
- while (i < size) {
- thread.atomicAdd(ref histo[buffer[i]], 1);
- i += stride;
- }
- }
- /// <summary>
- /// The main method.
- /// </summary>
- /// <returns>
- /// The <see cref="int"/>.
- /// </returns>
- public static int Main() {
- // Set the Target to Cuda
- CudafyModes.Target = eGPUType.Cuda;
- var cudafyModule = CudafyTranslator.Cudafy();
- var gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
- if (gpu is CudaGPU && gpu.GetDeviceProperties().Capability < new Version(1, 2)) {
- Console.WriteLine("Compute capability 1.2 or higher required for atomics.");
- return -1;
- }
- gpu.LoadModule(cudafyModule);
- // define a buffer to hold the random data
- var buffer = new byte[Size];
- // fill the buffer with random bytes from the Xorshift RNG
- var rndXorshift = new RndXorshift();
- rndXorshift.NextBytes(buffer);
- GPGPUProperties prop;
- try {
- prop = gpu.GetDeviceProperties();
- } catch (DllNotFoundException) {
- // thrown if the CUDA library cannot be found
- prop = gpu.GetDeviceProperties(false);
- }
- // capture the start time
- // starting the timer here so that we include the cost of
- // all of the operations on the GPU. if the data were
- // already on the GPU and we just timed the kernel
- // the timing would drop from 74 ms to 15 ms. Very fast.
- gpu.StartTimer();
- // allocate memory on the GPU for the file's data
- byte[] devBuffer = gpu.CopyToDevice(buffer);
- uint[] devHisto = gpu.Allocate<uint>(256);
- gpu.Set(devHisto);
- // kernel launch - 2x the number of processor count seems reasonable
- var processorCount = prop.MultiProcessorCount;
- // in case we're running on the emulator
- if (processorCount == 0)
- processorCount = 16;
- Console.WriteLine("Processors: {0}", processorCount);
- // dynamically invoke the CalculateHistogramAsKernel method
- gpu.Launch(processorCount * 2, 256).CalculateHistogramAsKernel(devBuffer, Size, devHisto);
- var histo = new uint[256];
- gpu.CopyFromDevice(devHisto, histo);
- // get stop time, and display the timing results
- var elapsedTime = gpu.StopTimer();
- Console.WriteLine("Time to generate: {0} ms", elapsedTime);
- long histoCount = 0;
- for (int i = 0; i < 256; i++) {
- histoCount += histo[i];
- }
- Console.WriteLine("Histogram Sum: {0}", histoCount);
- // verify that we have the same counts via CPU
- for (int i = 0; i < Size; i++)
- histo[buffer[i]]--;
- for (int i = 0; i < 256; i++) {
- if (histo[i] != 0)
- Console.WriteLine("Failure at {0}!", i);
- }
- gpu.FreeAll();
- Console.ReadLine();
- return 0;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement