Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include "cuda_runtime.h"
- #include "device_launch_parameters.h"
- #include <stdio.h>
- #include <iostream>
- #include <cmath>
- #include <iomanip>
- #include <fstream>
- #include <omp.h>
- #define PI 3.14159265
- #define STEP 0.1
- #define X1 2
- #define X2 3
- #define EPS 0.001
- using namespace std;
- __global__ void kernelCalcNextPoint(double *next_point, const double *x1, const double *x2, const double *step);
- __global__ void kernelCalcGradPoints(double *grads, double *points);
- __device__ double dev_calc_gradX1(double x1, double x2);
- __device__ double dev_calc_gradX2(double x1, double x2);
- __device__ double dev_nextX1(double curr_x1, double curr_x2, double step);
- __device__ double dev_nextX2(double curr_x1, double curr_x2, double step);
- __global__ void kernelCalcNextPoint(double *next_point, const double * x1, const double* x2, const double* step)
- {
- if (threadIdx.x == 0) {
- next_point[0] = dev_nextX1(x1[0], x2[0], step[0]);
- }
- else {
- next_point[1] = dev_nextX2(x1[0], x2[0], step[0]);
- }
- }
- __global__ void kernelCalcGradPoints(double *grads, double *points)
- {
- int i = threadIdx.x;
- if ((i + 1) / 2 == 0) {
- grads[i] = dev_calc_gradX1(points[i], points[i + 1]);
- }
- else {
- grads[i] = dev_calc_gradX2(points[i - 1], points[i]);
- }
- }
- __device__ double dev_calc_gradX1(double x1, double x2)
- {
- return 6 * x1 + x2 - 1;
- }
- __device__ double dev_calc_gradX2(double x1, double x2)
- {
- return x1 + 4 * x2 - 4;
- }
- __device__ double dev_nextX1(double curr_x1, double curr_x2, double step)
- {
- return curr_x1 - step * (6 * curr_x1 + curr_x2 - 1);
- }
- __device__ double dev_nextX2(double curr_x1, double curr_x2, double step)
- {
- return curr_x2 - step * (curr_x1 + 4 * curr_x2 - 4);
- }
- cudaError_t calcNextPoint(double &next_x1, double &next_x2, const double x1, const double x2, const double step)
- {
- double *dev_x1=0;
- double *dev_x2=0;
- double *dev_next_point=0;
- double *dev_step=0;
- double arr_x1[1] = { x1 };
- double arr_x2[1] = { x2 };
- double arr_step[1] = { step };
- cudaError_t cudaStatus;
- // Проверка на доступность видеокарты для вычислений
- cudaStatus = cudaSetDevice(0);
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?");
- goto Error;
- }
- // Выделение памяти на видеокарте
- cudaMalloc((void**)&dev_x1, sizeof(double));
- cudaMalloc((void**)&dev_x2, sizeof(double));
- cudaMalloc((void**)&dev_next_point, 2 * sizeof(double));
- cudaMalloc((void**)&dev_step, sizeof(double));
- // Отправка на видеокарту значений
- cudaMemcpy(dev_x1, &arr_x1, sizeof(double), cudaMemcpyHostToDevice);
- cudaMemcpy(dev_x2, &arr_x2, sizeof(double), cudaMemcpyHostToDevice);
- cudaMemcpy(dev_step, &arr_step, sizeof(double), cudaMemcpyHostToDevice);
- kernelCalcNextPoint << <1, 2 >> >(dev_next_point, dev_x1, dev_x2, dev_step);
- // Получить возникшие ошибки в процессе выполнения
- cudaStatus = cudaGetLastError();
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "kernel func launch failed: %s\n", cudaGetErrorString(cudaStatus));
- goto Error;
- }
- cudaDeviceSynchronize();
- double res_next_point[2] = { 0, 0 };
- // Получение результата выполнения
- cudaStatus = cudaMemcpy(res_next_point, dev_next_point, 2 * sizeof(double), cudaMemcpyDeviceToHost);
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaMemcpy failed!");
- goto Error;
- }
- next_x1 = res_next_point[0];
- next_x2 = res_next_point[1];
- Error:
- cudaFree(dev_x1);
- cudaFree(dev_x2);
- cudaFree(dev_next_point);
- cudaFree(dev_step);
- return cudaStatus;
- }
- cudaError_t calcGradPoints(double &grad_x1, double &grad_x2, double &grad_next_x1, double &grad_next_x2,
- const double x1, const double x2, const double next_x1, const double next_x2)
- {
- double *dev_grads=0;
- double *dev_points=0;
- double points[4] = { x1, x2, next_x1, next_x2 };
- cudaError_t cudaStatus;
- // Проверка на доступность видеокарты для вычислений
- cudaStatus = cudaSetDevice(0);
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?");
- goto Error;
- }
- // Выделение памяти на видеокарте
- cudaMalloc((void**)&dev_grads, 4 * sizeof(double));
- cudaMalloc((void**)&dev_points, 4 * sizeof(double));
- // Отправка на видеокарту значений
- cudaMemcpy(dev_points, points, 4 * sizeof(double), cudaMemcpyHostToDevice);
- kernelCalcGradPoints << <1, 4 >> >(dev_grads, dev_points);
- // Получить возникшие ошибки в процессе выполнения
- cudaStatus = cudaGetLastError();
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "kernel func launch failed: %s\n", cudaGetErrorString(cudaStatus));
- goto Error;
- }
- cudaDeviceSynchronize();
- double res_grads[4] = { 0, 0, 0, 0 };
- // Получение результата выполнения
- cudaStatus = cudaMemcpy(res_grads, dev_grads, 4 * sizeof(double), cudaMemcpyDeviceToHost);
- if (cudaStatus != cudaSuccess) {
- fprintf(stderr, "cudaMemcpy failed!");
- goto Error;
- }
- grad_x1 = res_grads[0];
- grad_x2 = res_grads[1];
- grad_next_x1 = res_grads[2];
- grad_next_x2 = res_grads[3];
- Error:
- cudaFree(dev_grads);
- cudaFree(dev_points);
- return cudaStatus;
- }
- // общие функции для всех способов
- double func(double x1, double x2)
- {
- return 3 * pow(x1, 2) + x1 * x2 + 2 * pow(x2, 2) - x1 - 4 * x2;
- }
- double calc_gradX1(double x1, double x2)
- {
- return 6 * x1 + x2 - 1;
- }
- double calc_gradX2(double x1, double x2)
- {
- return x1 + 4 * x2 - 4;
- }
- double nextX1(double curr_x1, double curr_x2, double step)
- {
- return curr_x1 - step * calc_gradX1(curr_x1, curr_x2);
- }
- double nextX2(double curr_x1, double curr_x2, double step)
- {
- return curr_x2 - step * calc_gradX2(curr_x1, curr_x2);
- }
- double minFounded(double x1, double x2, double next_x1, double next_x2, double eps)
- {
- double res = sqrt(pow(next_x1 - x1, 2) + pow(next_x2 - x2, 2));
- return res < eps;
- }
- double findFirstStep(double x1, double x2, double& next_x1, double& next_x2, double step)
- {
- double step_delta = STEP / 2;
- if (func(x1, x2) < func(next_x1, next_x2))
- {
- step -= step_delta;
- next_x1 = nextX1(x1, x2, step);
- next_x2 = nextX2(x1, x2, step);
- if (func(x1, x2) < func(next_x1, next_x2))
- {
- step += 2 * step_delta;
- next_x1 = nextX1(x1, x2, step);
- next_x2 = nextX2(x1, x2, step);
- if (func(x1, x2) < func(next_x1, next_x2))
- return 0;
- else
- return step;
- }
- else
- return step;
- }
- else
- return step;
- }
- double findNextStep(double gradX1, double gradX2, double gradNextX1, double gradNextX2, double curr_step)
- {
- double chislitel = (gradNextX1 * gradX1 + gradNextX2 * gradX2);
- double znamenatel = sqrt(pow(gradNextX1, 2) + pow(gradNextX2, 2)) * sqrt(pow(gradX1, 2) + pow(gradX2, 2));
- double cosBetweenVectors = chislitel / znamenatel;
- double corner = acos(cosBetweenVectors) * 180.0 / PI;
- double cornerMin = 30;
- double cornerMax = 90;
- if (corner < cornerMin)
- return 2 * curr_step;
- else if (corner >= cornerMin && corner <= cornerMax)
- return curr_step;
- else
- return curr_step / 3;
- }
- void cudaOnlyCalcMethod() {
- double x1 = X1;
- double x2 = X2;
- double eps = EPS;
- double step = STEP;
- int maxCountLoop = 100;
- double next_x1 = nextX1(x1, x2, step);
- double next_x2 = nextX2(x1, x2, step);
- step = findFirstStep(x1, x2, next_x1, next_x2, step);
- if (step == 0) {
- step = STEP;
- }
- cout << "Value of current step: " << step << "\n";
- cout << fixed << "Point value on iteration " << 0 << ": " << next_x1 << ", " << next_x2 << "\n";
- double gradX1, gradX2, gradNextX1, gradNextX2;
- for (int k = 1; k < maxCountLoop; k++)
- {
- calcGradPoints(gradX1, gradX2, gradNextX1, gradNextX2, x1, x2, next_x1, next_x2);
- step = findNextStep(gradX1, gradX2, gradNextX1, gradNextX2, step);
- cout << "Value of current step: " << step << "\n";
- x1 = next_x1;
- x2 = next_x2;
- calcNextPoint(next_x1, next_x2, x1, x2, step);
- // условие нахождения точки минимума
- if (minFounded(x1, x2, next_x1, next_x2, eps))
- {
- cout << fixed << "Min point of func:" << next_x1 << "; " << next_x2 << "\n";
- break;
- }
- cout << fixed << "Point value on iteration " << k << ": " << next_x1 << "; " << next_x2 << "\n";
- };
- }
- // Вычисление параллельными секциями
- double findFirstStep_sections(double x1, double x2, double& next_x1, double& next_x2, double step)
- {
- double step_delta = STEP / 2;
- if (func(x1, x2) < func(next_x1, next_x2))
- {
- double func_res1, func_res2, curr_step1, curr_step2,
- step1_next_x1, step1_next_x2, step2_next_x1, step2_next_x2;
- #pragma omp parallel sections
- {
- #pragma omp section
- {
- curr_step1 = step - step_delta;
- step1_next_x1 = nextX1(x1, x2, curr_step1);
- step1_next_x2 = nextX2(x1, x2, curr_step1);
- func_res1 = func(step1_next_x1, step1_next_x2);
- }
- #pragma omp section
- {
- curr_step2 = step + step_delta;
- step2_next_x1 = nextX1(x1, x2, curr_step2);
- step2_next_x2 = nextX2(x1, x2, curr_step2);
- func_res2 = func(step2_next_x1, step2_next_x2);
- }
- }
- double func_old_res = func(x1, x2);
- if (func_res1 < func_old_res && func_res1 < func_res2) {
- next_x1 = step1_next_x1;
- next_x2 = step1_next_x2;
- return curr_step1;
- }
- else if (func_res2 < func_old_res && func_res2 < func_res1) {
- next_x1 = step2_next_x1;
- next_x2 = step2_next_x2;
- return curr_step2;
- }
- else
- return step;
- }
- else
- return step;
- }
- double findNextStep_sections(double gradX1, double gradX2, double gradNextX1, double gradNextX2, double curr_step)
- {
- double chislitel, znamenatel;
- #pragma omp parallel sections
- {
- #pragma omp section
- {
- chislitel = (gradNextX1 * gradX1 + gradNextX2 * gradX2);
- }
- #pragma omp section
- {
- znamenatel = sqrt(pow(gradNextX1, 2) + pow(gradNextX2, 2)) * sqrt(pow(gradX1, 2) + pow(gradX2, 2));
- }
- }
- #pragma omp barrier
- double cosBetweenVectors = chislitel / znamenatel;
- double corner = acos(cosBetweenVectors) * 180.0 / PI;
- double cornerMin = 30;
- double cornerMax = 90;
- if (corner < cornerMin)
- return 2 * curr_step;
- else if (corner >= cornerMin && corner <= cornerMax)
- return curr_step;
- else
- return curr_step / 3;
- }
- void cuda_openmp_CalcMethod() {
- double x1 = X1;
- double x2 = X2;
- double eps = EPS;
- double step = STEP;
- int maxCountLoop = 100;
- double next_x1, next_x2;
- #pragma omp parallel sections
- {
- #pragma omp section
- {
- next_x1 = nextX1(x1, x2, step);
- }
- #pragma omp section
- {
- next_x2 = nextX2(x1, x2, step);
- }
- }
- #pragma omp barrier
- step = findFirstStep_sections(x1, x2, next_x1, next_x2, step);
- if (step == 0) {
- step = STEP;
- }
- cout << "Value of current step: " << step << "\n";
- cout << fixed << "Point value on iteration " << 0 << ": " << next_x1 << ", " << next_x2 << "\n";
- double gradX1, gradX2, gradNextX1, gradNextX2;
- for (int k = 1; k < maxCountLoop; k++)
- {
- calcGradPoints(gradX1, gradX2, gradNextX1, gradNextX2, x1, x2, next_x1, next_x2);
- step = findNextStep(gradX1, gradX2, gradNextX1, gradNextX2, step);
- cout << "Value of current step: " << step << "\n";
- x1 = next_x1;
- x2 = next_x2;
- calcNextPoint(next_x1, next_x2, x1, x2, step);
- // условие нахождения точки минимума
- if (minFounded(x1, x2, next_x1, next_x2, eps))
- {
- cout << fixed << "Min point of func:" << next_x1 << "; " << next_x2 << "\n";
- break;
- }
- cout << fixed << "Point value on iteration " << k << ": " << next_x1 << "; " << next_x2 << "\n";
- };
- }
- int main()
- {
- cudaEvent_t start_cuda, stop_cuda;
- float gpuTime;
- cout << "CUDA" << "\n";
- cudaEventCreate(&start_cuda);
- cudaEventCreate(&stop_cuda);
- cudaEventRecord(start_cuda, 0);
- cudaOnlyCalcMethod();
- cudaEventRecord(stop_cuda, 0);
- cudaEventSynchronize(stop_cuda);
- cudaEventElapsedTime(&gpuTime, start_cuda, stop_cuda);
- printf("on Host: \t%.4f \n", gpuTime);
- //cout << "Time: " << gpuTime << "\n\n";
- cudaEvent_t start_cuda_openmp, stop_cuda_openmp;
- //gpuTime = 0;
- cout << "CUDA + OpenMP" << "\n";
- cudaEventCreate(&start_cuda_openmp);
- cudaEventCreate(&stop_cuda_openmp);
- cudaEventRecord(start_cuda_openmp, 0);
- cuda_openmp_CalcMethod();
- cudaEventRecord(stop_cuda_openmp, 0);
- cudaEventSynchronize(stop_cuda_openmp);
- cudaEventElapsedTime(&gpuTime, start_cuda_openmp, stop_cuda_openmp);
- cout << "Time: " << gpuTime << "\n\n";
- //Ожидаем ввод, чтобы не скинулась консоль, рандомная переменная
- cin >> gpuTime;
- return 0;
- };
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement