Advertisement
Guest User

Untitled

a guest
Mar 20th, 2019
61
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 12.81 KB | None | 0 0
  1.  
  2. #include "cuda_runtime.h"
  3. #include "device_launch_parameters.h"
  4.  
  5. #include <stdio.h>
  6.  
  7. #include <iostream>
  8. #include <cmath>
  9. #include <iomanip>
  10. #include <fstream>
  11. #include <omp.h>
  12.  
  13. #define PI 3.14159265
  14. #define STEP 0.1
  15. #define X1 2
  16. #define X2 3
  17. #define EPS 0.001
  18.  
  19. using namespace std;
  20.  
  21. __global__ void kernelCalcNextPoint(double *next_point, const double *x1, const double *x2, const double *step);
  22. __global__ void kernelCalcGradPoints(double *grads, double *points);
  23. __device__ double dev_calc_gradX1(double x1, double x2);
  24. __device__ double dev_calc_gradX2(double x1, double x2);
  25. __device__ double dev_nextX1(double curr_x1, double curr_x2, double step);
  26. __device__ double dev_nextX2(double curr_x1, double curr_x2, double step);
  27.  
  28.  
  29. __global__ void kernelCalcNextPoint(double *next_point, const double * x1, const double* x2, const double* step)
  30. {
  31. if (threadIdx.x == 0) {
  32. next_point[0] = dev_nextX1(x1[0], x2[0], step[0]);
  33. }
  34. else {
  35. next_point[1] = dev_nextX2(x1[0], x2[0], step[0]);
  36. }
  37.  
  38. }
  39.  
  40. __global__ void kernelCalcGradPoints(double *grads, double *points)
  41. {
  42. int i = threadIdx.x;
  43. if ((i + 1) / 2 == 0) {
  44. grads[i] = dev_calc_gradX1(points[i], points[i + 1]);
  45. }
  46. else {
  47. grads[i] = dev_calc_gradX2(points[i - 1], points[i]);
  48. }
  49. }
  50.  
  51. __device__ double dev_calc_gradX1(double x1, double x2)
  52. {
  53. return 6 * x1 + x2 - 1;
  54. }
  55.  
  56. __device__ double dev_calc_gradX2(double x1, double x2)
  57. {
  58. return x1 + 4 * x2 - 4;
  59. }
  60.  
  61. __device__ double dev_nextX1(double curr_x1, double curr_x2, double step)
  62. {
  63. return curr_x1 - step * (6 * curr_x1 + curr_x2 - 1);
  64. }
  65.  
  66. __device__ double dev_nextX2(double curr_x1, double curr_x2, double step)
  67. {
  68. return curr_x2 - step * (curr_x1 + 4 * curr_x2 - 4);
  69. }
  70.  
  71. cudaError_t calcNextPoint(double &next_x1, double &next_x2, const double x1, const double x2, const double step)
  72. {
  73. double *dev_x1=0;
  74. double *dev_x2=0;
  75. double *dev_next_point=0;
  76. double *dev_step=0;
  77.  
  78. double arr_x1[1] = { x1 };
  79. double arr_x2[1] = { x2 };
  80. double arr_step[1] = { step };
  81.  
  82. cudaError_t cudaStatus;
  83.  
  84. // Проверка на доступность видеокарты для вычислений
  85. cudaStatus = cudaSetDevice(0);
  86. if (cudaStatus != cudaSuccess) {
  87. fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?");
  88. goto Error;
  89. }
  90.  
  91. // Выделение памяти на видеокарте
  92. cudaMalloc((void**)&dev_x1, sizeof(double));
  93. cudaMalloc((void**)&dev_x2, sizeof(double));
  94. cudaMalloc((void**)&dev_next_point, 2 * sizeof(double));
  95. cudaMalloc((void**)&dev_step, sizeof(double));
  96.  
  97. // Отправка на видеокарту значений
  98. cudaMemcpy(dev_x1, &arr_x1, sizeof(double), cudaMemcpyHostToDevice);
  99. cudaMemcpy(dev_x2, &arr_x2, sizeof(double), cudaMemcpyHostToDevice);
  100. cudaMemcpy(dev_step, &arr_step, sizeof(double), cudaMemcpyHostToDevice);
  101.  
  102. kernelCalcNextPoint << <1, 2 >> >(dev_next_point, dev_x1, dev_x2, dev_step);
  103.  
  104. // Получить возникшие ошибки в процессе выполнения
  105. cudaStatus = cudaGetLastError();
  106. if (cudaStatus != cudaSuccess) {
  107. fprintf(stderr, "kernel func launch failed: %s\n", cudaGetErrorString(cudaStatus));
  108. goto Error;
  109. }
  110.  
  111. cudaDeviceSynchronize();
  112.  
  113. double res_next_point[2] = { 0, 0 };
  114. // Получение результата выполнения
  115. cudaStatus = cudaMemcpy(res_next_point, dev_next_point, 2 * sizeof(double), cudaMemcpyDeviceToHost);
  116. if (cudaStatus != cudaSuccess) {
  117. fprintf(stderr, "cudaMemcpy failed!");
  118. goto Error;
  119. }
  120.  
  121. next_x1 = res_next_point[0];
  122. next_x2 = res_next_point[1];
  123.  
  124. Error:
  125. cudaFree(dev_x1);
  126. cudaFree(dev_x2);
  127. cudaFree(dev_next_point);
  128. cudaFree(dev_step);
  129.  
  130. return cudaStatus;
  131. }
  132.  
  133. cudaError_t calcGradPoints(double &grad_x1, double &grad_x2, double &grad_next_x1, double &grad_next_x2,
  134. const double x1, const double x2, const double next_x1, const double next_x2)
  135. {
  136. double *dev_grads=0;
  137. double *dev_points=0;
  138.  
  139. double points[4] = { x1, x2, next_x1, next_x2 };
  140.  
  141. cudaError_t cudaStatus;
  142.  
  143. // Проверка на доступность видеокарты для вычислений
  144. cudaStatus = cudaSetDevice(0);
  145. if (cudaStatus != cudaSuccess) {
  146. fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?");
  147. goto Error;
  148. }
  149.  
  150. // Выделение памяти на видеокарте
  151. cudaMalloc((void**)&dev_grads, 4 * sizeof(double));
  152. cudaMalloc((void**)&dev_points, 4 * sizeof(double));
  153.  
  154. // Отправка на видеокарту значений
  155. cudaMemcpy(dev_points, points, 4 * sizeof(double), cudaMemcpyHostToDevice);
  156.  
  157. kernelCalcGradPoints << <1, 4 >> >(dev_grads, dev_points);
  158.  
  159. // Получить возникшие ошибки в процессе выполнения
  160. cudaStatus = cudaGetLastError();
  161. if (cudaStatus != cudaSuccess) {
  162. fprintf(stderr, "kernel func launch failed: %s\n", cudaGetErrorString(cudaStatus));
  163. goto Error;
  164. }
  165.  
  166. cudaDeviceSynchronize();
  167.  
  168. double res_grads[4] = { 0, 0, 0, 0 };
  169. // Получение результата выполнения
  170. cudaStatus = cudaMemcpy(res_grads, dev_grads, 4 * sizeof(double), cudaMemcpyDeviceToHost);
  171. if (cudaStatus != cudaSuccess) {
  172. fprintf(stderr, "cudaMemcpy failed!");
  173. goto Error;
  174. }
  175.  
  176. grad_x1 = res_grads[0];
  177. grad_x2 = res_grads[1];
  178. grad_next_x1 = res_grads[2];
  179. grad_next_x2 = res_grads[3];
  180.  
  181. Error:
  182. cudaFree(dev_grads);
  183. cudaFree(dev_points);
  184.  
  185. return cudaStatus;
  186. }
  187.  
  188. // общие функции для всех способов
  189. double func(double x1, double x2)
  190. {
  191. return 3 * pow(x1, 2) + x1 * x2 + 2 * pow(x2, 2) - x1 - 4 * x2;
  192. }
  193.  
  194. double calc_gradX1(double x1, double x2)
  195. {
  196. return 6 * x1 + x2 - 1;
  197. }
  198.  
  199. double calc_gradX2(double x1, double x2)
  200. {
  201. return x1 + 4 * x2 - 4;
  202. }
  203.  
  204. double nextX1(double curr_x1, double curr_x2, double step)
  205. {
  206. return curr_x1 - step * calc_gradX1(curr_x1, curr_x2);
  207. }
  208.  
  209. double nextX2(double curr_x1, double curr_x2, double step)
  210. {
  211. return curr_x2 - step * calc_gradX2(curr_x1, curr_x2);
  212. }
  213.  
  214. double minFounded(double x1, double x2, double next_x1, double next_x2, double eps)
  215. {
  216. double res = sqrt(pow(next_x1 - x1, 2) + pow(next_x2 - x2, 2));
  217. return res < eps;
  218. }
  219.  
  220. double findFirstStep(double x1, double x2, double& next_x1, double& next_x2, double step)
  221. {
  222. double step_delta = STEP / 2;
  223. if (func(x1, x2) < func(next_x1, next_x2))
  224. {
  225. step -= step_delta;
  226. next_x1 = nextX1(x1, x2, step);
  227. next_x2 = nextX2(x1, x2, step);
  228.  
  229. if (func(x1, x2) < func(next_x1, next_x2))
  230. {
  231. step += 2 * step_delta;
  232. next_x1 = nextX1(x1, x2, step);
  233. next_x2 = nextX2(x1, x2, step);
  234.  
  235. if (func(x1, x2) < func(next_x1, next_x2))
  236. return 0;
  237. else
  238. return step;
  239. }
  240. else
  241. return step;
  242. }
  243. else
  244. return step;
  245. }
  246.  
  247. double findNextStep(double gradX1, double gradX2, double gradNextX1, double gradNextX2, double curr_step)
  248. {
  249. double chislitel = (gradNextX1 * gradX1 + gradNextX2 * gradX2);
  250. double znamenatel = sqrt(pow(gradNextX1, 2) + pow(gradNextX2, 2)) * sqrt(pow(gradX1, 2) + pow(gradX2, 2));
  251. double cosBetweenVectors = chislitel / znamenatel;
  252. double corner = acos(cosBetweenVectors) * 180.0 / PI;
  253.  
  254. double cornerMin = 30;
  255. double cornerMax = 90;
  256. if (corner < cornerMin)
  257. return 2 * curr_step;
  258. else if (corner >= cornerMin && corner <= cornerMax)
  259. return curr_step;
  260. else
  261. return curr_step / 3;
  262. }
  263.  
  264. void cudaOnlyCalcMethod() {
  265. double x1 = X1;
  266. double x2 = X2;
  267.  
  268. double eps = EPS;
  269. double step = STEP;
  270.  
  271. int maxCountLoop = 100;
  272.  
  273. double next_x1 = nextX1(x1, x2, step);
  274. double next_x2 = nextX2(x1, x2, step);
  275.  
  276. step = findFirstStep(x1, x2, next_x1, next_x2, step);
  277. if (step == 0) {
  278. step = STEP;
  279. }
  280.  
  281. cout << "Value of current step: " << step << "\n";
  282. cout << fixed << "Point value on iteration " << 0 << ": " << next_x1 << ", " << next_x2 << "\n";
  283.  
  284. double gradX1, gradX2, gradNextX1, gradNextX2;
  285. for (int k = 1; k < maxCountLoop; k++)
  286. {
  287. calcGradPoints(gradX1, gradX2, gradNextX1, gradNextX2, x1, x2, next_x1, next_x2);
  288.  
  289. step = findNextStep(gradX1, gradX2, gradNextX1, gradNextX2, step);
  290. cout << "Value of current step: " << step << "\n";
  291.  
  292. x1 = next_x1;
  293. x2 = next_x2;
  294.  
  295. calcNextPoint(next_x1, next_x2, x1, x2, step);
  296.  
  297. // условие нахождения точки минимума
  298. if (minFounded(x1, x2, next_x1, next_x2, eps))
  299. {
  300. cout << fixed << "Min point of func:" << next_x1 << "; " << next_x2 << "\n";
  301. break;
  302. }
  303.  
  304. cout << fixed << "Point value on iteration " << k << ": " << next_x1 << "; " << next_x2 << "\n";
  305. };
  306. }
  307.  
  308. // Вычисление параллельными секциями
  309. double findFirstStep_sections(double x1, double x2, double& next_x1, double& next_x2, double step)
  310. {
  311. double step_delta = STEP / 2;
  312. if (func(x1, x2) < func(next_x1, next_x2))
  313. {
  314. double func_res1, func_res2, curr_step1, curr_step2,
  315. step1_next_x1, step1_next_x2, step2_next_x1, step2_next_x2;
  316. #pragma omp parallel sections
  317. {
  318. #pragma omp section
  319. {
  320. curr_step1 = step - step_delta;
  321. step1_next_x1 = nextX1(x1, x2, curr_step1);
  322. step1_next_x2 = nextX2(x1, x2, curr_step1);
  323. func_res1 = func(step1_next_x1, step1_next_x2);
  324. }
  325. #pragma omp section
  326. {
  327. curr_step2 = step + step_delta;
  328. step2_next_x1 = nextX1(x1, x2, curr_step2);
  329. step2_next_x2 = nextX2(x1, x2, curr_step2);
  330. func_res2 = func(step2_next_x1, step2_next_x2);
  331. }
  332. }
  333.  
  334. double func_old_res = func(x1, x2);
  335. if (func_res1 < func_old_res && func_res1 < func_res2) {
  336. next_x1 = step1_next_x1;
  337. next_x2 = step1_next_x2;
  338. return curr_step1;
  339. }
  340. else if (func_res2 < func_old_res && func_res2 < func_res1) {
  341. next_x1 = step2_next_x1;
  342. next_x2 = step2_next_x2;
  343. return curr_step2;
  344. }
  345. else
  346. return step;
  347. }
  348. else
  349. return step;
  350. }
  351.  
  352. double findNextStep_sections(double gradX1, double gradX2, double gradNextX1, double gradNextX2, double curr_step)
  353. {
  354. double chislitel, znamenatel;
  355. #pragma omp parallel sections
  356. {
  357. #pragma omp section
  358. {
  359. chislitel = (gradNextX1 * gradX1 + gradNextX2 * gradX2);
  360. }
  361. #pragma omp section
  362. {
  363. znamenatel = sqrt(pow(gradNextX1, 2) + pow(gradNextX2, 2)) * sqrt(pow(gradX1, 2) + pow(gradX2, 2));
  364. }
  365. }
  366. #pragma omp barrier
  367.  
  368. double cosBetweenVectors = chislitel / znamenatel;
  369. double corner = acos(cosBetweenVectors) * 180.0 / PI;
  370.  
  371. double cornerMin = 30;
  372. double cornerMax = 90;
  373. if (corner < cornerMin)
  374. return 2 * curr_step;
  375. else if (corner >= cornerMin && corner <= cornerMax)
  376. return curr_step;
  377. else
  378. return curr_step / 3;
  379. }
  380.  
  381. void cuda_openmp_CalcMethod() {
  382. double x1 = X1;
  383. double x2 = X2;
  384.  
  385. double eps = EPS;
  386. double step = STEP;
  387.  
  388. int maxCountLoop = 100;
  389. double next_x1, next_x2;
  390.  
  391. #pragma omp parallel sections
  392. {
  393. #pragma omp section
  394. {
  395. next_x1 = nextX1(x1, x2, step);
  396. }
  397. #pragma omp section
  398. {
  399. next_x2 = nextX2(x1, x2, step);
  400. }
  401. }
  402. #pragma omp barrier
  403.  
  404. step = findFirstStep_sections(x1, x2, next_x1, next_x2, step);
  405. if (step == 0) {
  406. step = STEP;
  407. }
  408.  
  409. cout << "Value of current step: " << step << "\n";
  410. cout << fixed << "Point value on iteration " << 0 << ": " << next_x1 << ", " << next_x2 << "\n";
  411.  
  412. double gradX1, gradX2, gradNextX1, gradNextX2;
  413. for (int k = 1; k < maxCountLoop; k++)
  414. {
  415. calcGradPoints(gradX1, gradX2, gradNextX1, gradNextX2, x1, x2, next_x1, next_x2);
  416.  
  417. step = findNextStep(gradX1, gradX2, gradNextX1, gradNextX2, step);
  418. cout << "Value of current step: " << step << "\n";
  419.  
  420. x1 = next_x1;
  421. x2 = next_x2;
  422.  
  423. calcNextPoint(next_x1, next_x2, x1, x2, step);
  424.  
  425. // условие нахождения точки минимума
  426. if (minFounded(x1, x2, next_x1, next_x2, eps))
  427. {
  428. cout << fixed << "Min point of func:" << next_x1 << "; " << next_x2 << "\n";
  429. break;
  430. }
  431.  
  432. cout << fixed << "Point value on iteration " << k << ": " << next_x1 << "; " << next_x2 << "\n";
  433. };
  434. }
  435.  
  436. int main()
  437. {
  438. cudaEvent_t start_cuda, stop_cuda;
  439. float gpuTime;
  440.  
  441. cout << "CUDA" << "\n";
  442.  
  443. cudaEventCreate(&start_cuda);
  444. cudaEventCreate(&stop_cuda);
  445. cudaEventRecord(start_cuda, 0);
  446.  
  447. cudaOnlyCalcMethod();
  448.  
  449. cudaEventRecord(stop_cuda, 0);
  450. cudaEventSynchronize(stop_cuda);
  451. cudaEventElapsedTime(&gpuTime, start_cuda, stop_cuda);
  452. printf("on Host: \t%.4f \n", gpuTime);
  453. //cout << "Time: " << gpuTime << "\n\n";
  454.  
  455. cudaEvent_t start_cuda_openmp, stop_cuda_openmp;
  456. //gpuTime = 0;
  457.  
  458. cout << "CUDA + OpenMP" << "\n";
  459.  
  460. cudaEventCreate(&start_cuda_openmp);
  461. cudaEventCreate(&stop_cuda_openmp);
  462. cudaEventRecord(start_cuda_openmp, 0);
  463.  
  464.  
  465. cuda_openmp_CalcMethod();
  466.  
  467. cudaEventRecord(stop_cuda_openmp, 0);
  468. cudaEventSynchronize(stop_cuda_openmp);
  469. cudaEventElapsedTime(&gpuTime, start_cuda_openmp, stop_cuda_openmp);
  470.  
  471. cout << "Time: " << gpuTime << "\n\n";
  472.  
  473. //Ожидаем ввод, чтобы не скинулась консоль, рандомная переменная
  474. cin >> gpuTime;
  475. return 0;
  476. };
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement