Untitled


#include "cuda_runtime.h"
#include "device_launch_parameters.h"

#include <stdio.h>

#include <iostream>
#include <cmath>
#include <iomanip>
#include <fstream>
#include <omp.h>

#define PI 3.14159265
#define STEP 0.1
#define X1 2
#define X2 3
#define EPS 0.001

using namespace std;

__global__ void kernelCalcNextPoint(double *next_point, const double *x1, const double *x2, const double *step);
__global__ void kernelCalcGradPoints(double *grads, double *points);
__device__ double dev_calc_gradX1(double x1, double x2);
__device__ double dev_calc_gradX2(double x1, double x2);
__device__ double dev_nextX1(double curr_x1, double curr_x2, double step);
__device__ double dev_nextX2(double curr_x1, double curr_x2, double step);


__global__ void kernelCalcNextPoint(double *next_point, const double * x1, const double* x2, const double* step)
{
	if (threadIdx.x == 0) {
		next_point[0] = dev_nextX1(x1[0], x2[0], step[0]);
	}
	else {
		next_point[1] = dev_nextX2(x1[0], x2[0], step[0]);
	}

}

__global__ void kernelCalcGradPoints(double *grads, double *points)
{
	int i = threadIdx.x;
	if ((i + 1) / 2 == 0) {
		grads[i] = dev_calc_gradX1(points[i], points[i + 1]);
	}
	else {
		grads[i] = dev_calc_gradX2(points[i - 1], points[i]);
	}
}

__device__ double dev_calc_gradX1(double x1, double x2)
{
	return 6 * x1 + x2 - 1;
}

__device__ double dev_calc_gradX2(double x1, double x2)
{
	return x1 + 4 * x2 - 4;
}

__device__ double dev_nextX1(double curr_x1, double curr_x2, double step)
{
	return curr_x1 - step * (6 * curr_x1 + curr_x2 - 1);
}

__device__ double dev_nextX2(double curr_x1, double curr_x2, double step)
{
	return curr_x2 - step * (curr_x1 + 4 * curr_x2 - 4);
}

cudaError_t calcNextPoint(double &next_x1, double &next_x2, const double x1, const double x2, const double step)
{
	double *dev_x1=0;
	double *dev_x2=0;
	double *dev_next_point=0;
	double *dev_step=0;

	double arr_x1[1] = { x1 };
	double arr_x2[1] = { x2 };
	double arr_step[1] = { step };

	cudaError_t cudaStatus;

	// Проверка на доступность видеокарты для вычислений
	cudaStatus = cudaSetDevice(0);
	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "cudaSetDevice failed!  Do you have a CUDA-capable GPU installed?");
		goto Error;
	}

	// Выделение памяти на видеокарте
	cudaMalloc((void**)&dev_x1, sizeof(double));
	cudaMalloc((void**)&dev_x2, sizeof(double));
	cudaMalloc((void**)&dev_next_point, 2 * sizeof(double));
	cudaMalloc((void**)&dev_step, sizeof(double));

	// Отправка на видеокарту значений
	cudaMemcpy(dev_x1, &arr_x1, sizeof(double), cudaMemcpyHostToDevice);
	cudaMemcpy(dev_x2, &arr_x2, sizeof(double), cudaMemcpyHostToDevice);
	cudaMemcpy(dev_step, &arr_step, sizeof(double), cudaMemcpyHostToDevice);

	kernelCalcNextPoint << <1, 2 >> >(dev_next_point, dev_x1, dev_x2, dev_step);

	// Получить возникшие ошибки в процессе выполнения
	cudaStatus = cudaGetLastError();
	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "kernel func launch failed: %s\n", cudaGetErrorString(cudaStatus));
		goto Error;
	}

	cudaDeviceSynchronize();

	double res_next_point[2] = { 0, 0 };
	// Получение результата выполнения
	cudaStatus = cudaMemcpy(res_next_point, dev_next_point, 2 * sizeof(double), cudaMemcpyDeviceToHost);
	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "cudaMemcpy failed!");
		goto Error;
	}

	next_x1 = res_next_point[0];
	next_x2 = res_next_point[1];

Error:
	cudaFree(dev_x1);
	cudaFree(dev_x2);
	cudaFree(dev_next_point);
	cudaFree(dev_step);

	return cudaStatus;
}

cudaError_t calcGradPoints(double &grad_x1, double &grad_x2, double &grad_next_x1, double &grad_next_x2,
	const double x1, const double x2, const double next_x1, const double next_x2)
{
	double *dev_grads=0;
	double *dev_points=0;

	double points[4] = { x1, x2, next_x1, next_x2 };

	cudaError_t cudaStatus;

	// Проверка на доступность видеокарты для вычислений
	cudaStatus = cudaSetDevice(0);
	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "cudaSetDevice failed!  Do you have a CUDA-capable GPU installed?");
		goto Error;
	}

	// Выделение памяти на видеокарте
	cudaMalloc((void**)&dev_grads, 4 * sizeof(double));
	cudaMalloc((void**)&dev_points, 4 * sizeof(double));

	// Отправка на видеокарту значений
	cudaMemcpy(dev_points, points, 4 * sizeof(double), cudaMemcpyHostToDevice);

	kernelCalcGradPoints << <1, 4 >> >(dev_grads, dev_points);

	// Получить возникшие ошибки в процессе выполнения
	cudaStatus = cudaGetLastError();
	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "kernel func launch failed: %s\n", cudaGetErrorString(cudaStatus));
		goto Error;
	}

	cudaDeviceSynchronize();

	double res_grads[4] = { 0, 0, 0, 0 };
	// Получение результата выполнения
	cudaStatus = cudaMemcpy(res_grads, dev_grads, 4 * sizeof(double), cudaMemcpyDeviceToHost);
	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "cudaMemcpy failed!");
		goto Error;
	}

	grad_x1 = res_grads[0];
	grad_x2 = res_grads[1];
	grad_next_x1 = res_grads[2];
	grad_next_x2 = res_grads[3];

Error:
	cudaFree(dev_grads);
	cudaFree(dev_points);

	return cudaStatus;
}

// общие функции для всех способов
double func(double x1, double x2)
{
	return 3 * pow(x1, 2) + x1 * x2 + 2 * pow(x2, 2) - x1 - 4 * x2;
}

double calc_gradX1(double x1, double x2)
{
	return 6 * x1 + x2 - 1;
}

double calc_gradX2(double x1, double x2)
{
	return x1 + 4 * x2 - 4;
}

double nextX1(double curr_x1, double curr_x2, double step)
{
	return curr_x1 - step * calc_gradX1(curr_x1, curr_x2);
}

double nextX2(double curr_x1, double curr_x2, double step)
{
	return curr_x2 - step * calc_gradX2(curr_x1, curr_x2);
}

double minFounded(double x1, double x2, double next_x1, double next_x2, double eps)
{
	double res = sqrt(pow(next_x1 - x1, 2) + pow(next_x2 - x2, 2));
	return res < eps;
}

double findFirstStep(double x1, double x2, double& next_x1, double& next_x2, double step)
{
	double step_delta = STEP / 2;
	if (func(x1, x2) < func(next_x1, next_x2))
	{
		step -= step_delta;
		next_x1 = nextX1(x1, x2, step);
		next_x2 = nextX2(x1, x2, step);

		if (func(x1, x2) < func(next_x1, next_x2))
		{
			step += 2 * step_delta;
			next_x1 = nextX1(x1, x2, step);
			next_x2 = nextX2(x1, x2, step);

			if (func(x1, x2) < func(next_x1, next_x2))
				return 0;
			else
				return step;
		}
		else
			return step;
	}
	else
		return step;
}

double findNextStep(double gradX1, double gradX2, double gradNextX1, double gradNextX2, double curr_step)
{
	double chislitel = (gradNextX1 * gradX1 + gradNextX2 * gradX2);
	double znamenatel = sqrt(pow(gradNextX1, 2) + pow(gradNextX2, 2)) * sqrt(pow(gradX1, 2) + pow(gradX2, 2));
	double cosBetweenVectors = chislitel / znamenatel;
	double corner = acos(cosBetweenVectors) * 180.0 / PI;

	double cornerMin = 30;
	double cornerMax = 90;
	if (corner < cornerMin)
		return 2 * curr_step;
	else if (corner >= cornerMin && corner <= cornerMax)
		return curr_step;
	else
		return curr_step / 3;
}

void cudaOnlyCalcMethod() {
	double x1 = X1;
	double x2 = X2;

	double eps = EPS;
	double step = STEP;

	int maxCountLoop = 100;

	double next_x1 = nextX1(x1, x2, step);
	double next_x2 = nextX2(x1, x2, step);

	step = findFirstStep(x1, x2, next_x1, next_x2, step);
	if (step == 0) {
		step = STEP;
	}

	cout << "Value of current step: " << step << "\n";
	cout << fixed << "Point value on iteration " << 0 << ": " << next_x1 << ", " << next_x2 << "\n";

	double gradX1, gradX2, gradNextX1, gradNextX2;
	for (int k = 1; k < maxCountLoop; k++)
	{
		calcGradPoints(gradX1, gradX2, gradNextX1, gradNextX2, x1, x2, next_x1, next_x2);

		step = findNextStep(gradX1, gradX2, gradNextX1, gradNextX2, step);
		cout << "Value of current step: " << step << "\n";

		x1 = next_x1;
		x2 = next_x2;

		calcNextPoint(next_x1, next_x2, x1, x2, step);

		// условие нахождения точки минимума
		if (minFounded(x1, x2, next_x1, next_x2, eps))
		{
			cout << fixed << "Min point of func:" << next_x1 << "; " << next_x2 << "\n";
			break;
		}

		cout << fixed << "Point value on iteration " << k << ": " << next_x1 << "; " << next_x2 << "\n";
	};
}

// Вычисление параллельными секциями
double findFirstStep_sections(double x1, double x2, double& next_x1, double& next_x2, double step)
{
	double step_delta = STEP / 2;
	if (func(x1, x2) < func(next_x1, next_x2))
	{
		double func_res1, func_res2, curr_step1, curr_step2,
			step1_next_x1, step1_next_x2, step2_next_x1, step2_next_x2;
#pragma omp parallel sections
		{
#pragma omp section
			{
				curr_step1 = step - step_delta;
				step1_next_x1 = nextX1(x1, x2, curr_step1);
				step1_next_x2 = nextX2(x1, x2, curr_step1);
				func_res1 = func(step1_next_x1, step1_next_x2);
			}
#pragma omp section
			{
				curr_step2 = step + step_delta;
				step2_next_x1 = nextX1(x1, x2, curr_step2);
				step2_next_x2 = nextX2(x1, x2, curr_step2);
				func_res2 = func(step2_next_x1, step2_next_x2);
			}
		}

		double func_old_res = func(x1, x2);
		if (func_res1 < func_old_res && func_res1 < func_res2) {
			next_x1 = step1_next_x1;
			next_x2 = step1_next_x2;
			return curr_step1;
		}
		else if (func_res2 < func_old_res && func_res2 < func_res1) {
			next_x1 = step2_next_x1;
			next_x2 = step2_next_x2;
			return curr_step2;
		}
		else
			return step;
	}
	else
		return step;
}

double findNextStep_sections(double gradX1, double gradX2, double gradNextX1, double gradNextX2, double curr_step)
{
	double chislitel, znamenatel;
#pragma omp parallel sections
	{
#pragma omp section
		{
			chislitel = (gradNextX1 * gradX1 + gradNextX2 * gradX2);
		}
#pragma omp section
		{
			znamenatel = sqrt(pow(gradNextX1, 2) + pow(gradNextX2, 2)) * sqrt(pow(gradX1, 2) + pow(gradX2, 2));
		}
	}
#pragma omp barrier

	double cosBetweenVectors = chislitel / znamenatel;
	double corner = acos(cosBetweenVectors) * 180.0 / PI;

	double cornerMin = 30;
	double cornerMax = 90;
	if (corner < cornerMin)
		return 2 * curr_step;
	else if (corner >= cornerMin && corner <= cornerMax)
		return curr_step;
	else
		return curr_step / 3;
}

void cuda_openmp_CalcMethod() {
	double x1 = X1;
	double x2 = X2;

	double eps = EPS;
	double step = STEP;

	int maxCountLoop = 100;
	double next_x1, next_x2;

#pragma omp parallel sections
	{
#pragma omp section
		{
			next_x1 = nextX1(x1, x2, step);
		}
#pragma omp section
		{
			next_x2 = nextX2(x1, x2, step);
		}
	}
#pragma omp barrier

	step = findFirstStep_sections(x1, x2, next_x1, next_x2, step);
	if (step == 0) {
		step = STEP;
	}

	cout << "Value of current step: " << step << "\n";
	cout << fixed << "Point value on iteration " << 0 << ": " << next_x1 << ", " << next_x2 << "\n";

	double gradX1, gradX2, gradNextX1, gradNextX2;
	for (int k = 1; k < maxCountLoop; k++)
	{
		calcGradPoints(gradX1, gradX2, gradNextX1, gradNextX2, x1, x2, next_x1, next_x2);

		step = findNextStep(gradX1, gradX2, gradNextX1, gradNextX2, step);
		cout << "Value of current step: " << step << "\n";

		x1 = next_x1;
		x2 = next_x2;

		calcNextPoint(next_x1, next_x2, x1, x2, step);

		// условие нахождения точки минимума
		if (minFounded(x1, x2, next_x1, next_x2, eps))
		{
			cout << fixed << "Min point of func:" << next_x1 << "; " << next_x2 << "\n";
			break;
		}

		cout << fixed << "Point value on iteration " << k << ": " << next_x1 << "; " << next_x2 << "\n";
	};
}

int main()
{
	cudaEvent_t start_cuda, stop_cuda;
	float gpuTime;

	cout << "CUDA" << "\n";

	cudaEventCreate(&start_cuda);
	cudaEventCreate(&stop_cuda);
	cudaEventRecord(start_cuda, 0);

	cudaOnlyCalcMethod();

	cudaEventRecord(stop_cuda, 0);
	cudaEventSynchronize(stop_cuda);
	cudaEventElapsedTime(&gpuTime, start_cuda, stop_cuda);
	printf("on Host: \t%.4f \n", gpuTime);
	//cout << "Time: " << gpuTime << "\n\n";

	cudaEvent_t start_cuda_openmp, stop_cuda_openmp;
	//gpuTime = 0;

	cout << "CUDA + OpenMP" << "\n";

	cudaEventCreate(&start_cuda_openmp);
	cudaEventCreate(&stop_cuda_openmp);
	cudaEventRecord(start_cuda_openmp, 0);


	cuda_openmp_CalcMethod();

	cudaEventRecord(stop_cuda_openmp, 0);
	cudaEventSynchronize(stop_cuda_openmp);
	cudaEventElapsedTime(&gpuTime, start_cuda_openmp, stop_cuda_openmp);

	cout << "Time: " << gpuTime << "\n\n";

	//Ожидаем ввод, чтобы не скинулась консоль, рандомная переменная
	cin >> gpuTime;
	return 0;
};