Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include "cuda_runtime.h"
- #include "device_launch_parameters.h"
- #include <stdio.h>
- __global__ void block_sum_rw(float *dane, float *wyniki, const size_t rozmiar_danych)
- {
- int n = 2;
- unsigned int i = n * (blockIdx.x* blockDim.x + threadIdx.x);
- unsigned int offset = blockIdx.x* blockDim.x;
- for (int j = 1; j < n; ++j)
- dane[offset + threadIdx.x] += dane[offset + threadIdx.x + blockDim.x*j];
- for (unsigned int odstep = 2; odstep <= blockDim.x; odstep *= 2)
- {
- if (threadIdx.x%odstep == 0)
- if (i + odstep < rozmiar_danych)
- dane[threadIdx.x / odstep + offset] += dane[threadIdx.x / odstep + offset + blockDim.x / odstep];
- __syncthreads();
- }
- if (threadIdx.x == 0)
- {
- wyniki[blockIdx.x] = dane[offset];
- }
- }
- int main()
- {
- float *dane;
- float *wyniki;
- int block_size = 512;
- const int arraySize = 1<<28;
- dane = (float*)malloc(arraySize * sizeof(float));
- wyniki = (float*)malloc(arraySize * sizeof(float));
- for (int i = 0; i < arraySize; i++) {
- dane[i] = 1;
- wyniki[i] = 0;
- }
- int threads = arraySize / 2;
- if (arraySize > 2 * block_size) threads = block_size;
- int grid = arraySize / threads / 2;
- printf("grid: %d, threads: %d\n", grid, threads);
- //int grid = 2;
- //int threads = 32;
- float *dane_cuda, *wyniki_cuda, *c_d;
- cudaMalloc((void **)&dane_cuda, arraySize * sizeof(float));
- cudaMalloc((void **)&wyniki_cuda, arraySize * sizeof(float));
- cudaMemcpy(dane_cuda, dane, arraySize * sizeof(float), cudaMemcpyHostToDevice);
- cudaMemcpy(wyniki_cuda, wyniki, arraySize*sizeof(float), cudaMemcpyHostToDevice);
- block_sum_rw << <grid, threads >> > (dane_cuda, wyniki_cuda, arraySize);
- while (grid > 1) {
- if (grid >= block_size) grid /= threads;
- else {
- threads = grid;
- grid = 1;
- }
- block_sum_rw << <grid, threads >> > (wyniki_cuda, wyniki_cuda, arraySize);
- }
- cudaMemcpy(wyniki, wyniki_cuda, arraySize * sizeof(float), cudaMemcpyDeviceToHost);
- // cudaDeviceReset must be called before exiting in order for profiling and
- // tracing tools such as Nsight and Visual Profiler to show complete traces.
- cudaError_t cudaStatus = cudaDeviceReset();
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaDeviceReset failed!");
- return 1;
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement