Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include "cuda_runtime.h"
- #include "device_launch_parameters.h"
- #include <stdio.h>
- #include <math.h>
- #include <iostream>
- #include <iomanip>
- #include <ctime>
- #define M_PI 3.14159265358979323846
- static const int blockSize = 1024;
- static const int gridSize = 500;
- static const int size = gridSize * blockSize;
- void sumWithCuda(double* table, double cpu_time);
- __global__ void sum(double* in, double* out) {
- __shared__ double sh_sum[blockSize];
- unsigned int tid = threadIdx.x;
- unsigned int i = blockIdx.x * blockSize + tid;
- sh_sum[tid] = in[i];
- __syncthreads();
- for (int s = blockSize / 2; s > 0; s >>= 1) {
- if (tid < s) {
- sh_sum[tid] += sh_sum[tid + s];
- }
- __syncthreads();
- }
- if (tid == 0) out[blockIdx.x] = sh_sum[0];
- }
- int main()
- {
- double org_pi = M_PI;
- double sum = 0, pi;
- double* a = new double[size];
- a[0] = 0;
- for (int i = 1; i < size; i++) {
- a[i] = 1 / (pow(i, 2));
- }
- clock_t begin = clock();
- //CPU
- for (int i = 1; i < size; i++) {
- sum += a[i];
- }
- clock_t end = clock();
- double elapsed_time = double(end - begin) / CLOCKS_PER_SEC;
- pi = sqrt(6 * sum);
- double accuracy = 100 * pi / org_pi;
- std::cout << std::setprecision(20) << "CPU: " << pi << "\tczas w sekundach: " << elapsed_time << "\t\tdokladnosc: " << accuracy << "%\n";
- //GPU
- sumWithCuda(a, elapsed_time);
- return 0;
- }
- // Helper function for using CUDA to add vectors in parallel.
- void sumWithCuda(double* table, double cpu_time)
- {
- double org_pi = M_PI;
- double* dev_table, * dev_out;
- double* out = new double[size];
- cudaMalloc((void**)&dev_table, size * sizeof(double));
- cudaMemcpy(dev_table, table, size * sizeof(double), cudaMemcpyHostToDevice);
- cudaMalloc((void**)&dev_out, size * sizeof(double));
- clock_t begin = clock();
- sum << <gridSize, blockSize >> > (dev_table, dev_out);
- sum << <1, blockSize >> > (dev_out, dev_out);
- clock_t end = clock();
- double elapsed_time = double(end - begin) / CLOCKS_PER_SEC;
- cudaMemcpy(out, dev_out, size * sizeof(double), cudaMemcpyDeviceToHost);
- cudaFree(dev_table);
- cudaFree(dev_out);
- double pi_gpu = sqrt(6 * out[0]);
- double accuracy = 100 * pi_gpu / org_pi;
- std::cout << std::setprecision(20) << "GPU: " << pi_gpu << "\tczas w sekundach: " << elapsed_time << "\t\tdokladnosc: " << accuracy << "%\n";
- double speed_up = cpu_time / elapsed_time;
- std::cout << std::setprecision(5) << "SpeedUp: " << speed_up << "\n";
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement