Advertisement
manac68974

Untitled

Dec 18th, 2019
102
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 29.92 KB | None | 0 0
  1. #include <mpi.h>
  2. #include <omp.h>
  3.  
  4. #include <iomanip>
  5. #include <iostream>
  6. #include <vector>
  7. #include <map>
  8. #include <cmath>
  9. #include <cstdlib>
  10. #include <algorithm>
  11. #include <fstream>
  12. #include<sstream>
  13.  
  14. using namespace std;
  15.  
  16. class MPIComputations{
  17. int M, N;
  18. int cur_block_size_x, cur_block_size_y, cur_block_global_offset_x, cur_block_global_offset_y;
  19. int Gx, Gy;
  20. int cur_block_global_coors_x, cur_block_global_coors_y;
  21. double h_x, h_y;
  22. int global_block_size_x, global_block_size_y;
  23. int my_rank;
  24. double current_norm;
  25. MPI_Comm comm;
  26. double x_left, x_right, y_bottom, y_top;
  27.  
  28. vector<double> internal_data;
  29. vector<double> old_internal_data;
  30. vector<double> external_data[4];
  31.  
  32. vector <vector <double>> coefs;
  33.  
  34. int IsNodeInternalCornerOrSide(int current_node_global_offset_x, int current_node_global_offset_y){
  35.  
  36. //corners
  37. //left bottom corner
  38. if (current_node_global_offset_x == 0 && current_node_global_offset_y == 0){
  39. return 2;
  40. }
  41.  
  42. //left top corner
  43. if (current_node_global_offset_x == 0 && current_node_global_offset_y == N){
  44. return 4;
  45. }
  46.  
  47. //right bottom corner
  48. if (current_node_global_offset_x == M && current_node_global_offset_y == 0){
  49. return 6;
  50. }
  51.  
  52. //right top corner
  53. if (current_node_global_offset_x == M && current_node_global_offset_y == N){
  54. return 8;
  55. }
  56.  
  57. //sides
  58. //left side
  59. if (current_node_global_offset_y >= 1 && current_node_global_offset_y <= N - 1 &&
  60. current_node_global_offset_x == 0){
  61. return 1;
  62. }
  63.  
  64. //right side
  65. if (current_node_global_offset_y >= 1 && current_node_global_offset_y <= N - 1 &&
  66. current_node_global_offset_x == M){
  67. return 3;
  68. }
  69.  
  70. //bottom side
  71. if (current_node_global_offset_x >= 1 && current_node_global_offset_x <= M - 1 &&
  72. current_node_global_offset_y == 0){
  73. return 5;
  74. }
  75.  
  76. //top side
  77. if ((current_node_global_offset_x >= 1 && current_node_global_offset_x <= M - 1 &&
  78. current_node_global_offset_y == N)){
  79. return 7;
  80. }
  81.  
  82. //internal
  83. if ((current_node_global_offset_x >= 1 && current_node_global_offset_x <= M - 1) &&
  84. (current_node_global_offset_y >= 1 && current_node_global_offset_y <= N - 1)){
  85. return 0;
  86. }
  87.  
  88. return -1;
  89. }
  90.  
  91. double k(double x, double y) {
  92. return 1 + pow(x + y, 2);
  93. }
  94.  
  95. double q(double x, double y) {
  96. return 1;
  97. }
  98.  
  99. double u(double x, double y) {
  100. return 2.0 / (1 + pow(x, 2) + pow(y, 2));
  101. }
  102.  
  103. // psi_R(x, y) = k(A2, y) * du/dx(A2, y)
  104. double psi_R(double y) {
  105. return (-12) * (pow((y + 3), 2) + 1) / pow((pow(y, 2) + 10), 2);
  106. }
  107.  
  108. // psi_L(x, y) = -k(A1, y) * du/dx(A1, y)
  109. double psi_L(double y) {
  110. return (-8) * (pow((y - 2), 2) + 1) / pow((pow(y, 2) + 5), 2);
  111. }
  112.  
  113. // psi_T(x, y) = k(x, B2) * du/dy(x, B2)
  114. double psi_T(double x) {
  115. return (-16) * (pow((x + 4), 2) + 1) / pow((pow(x, 2) + 17), 2);
  116. }
  117.  
  118. // psi_B(x, y) = -k(x, B1) * du/dy(x, B1)
  119. double psi_B(double x) {
  120. return (-4) * (pow((x - 1), 2) + 1) / pow((pow(x, 2) + 2), 2);
  121. }
  122.  
  123. // right-part function of Poisson equation
  124. double F(double x, double y) {
  125. return 2 * (pow(x,4) + pow(y,4) + 2 * (pow(x,2) + 3) * pow(y,2) + 6 * pow(x,2) + 16*x*y + 5)
  126. / pow((1 + pow(x, 2) + pow(y, 2)), 3);
  127. }
  128.  
  129. //inner_product(A[i], internal_data)
  130.  
  131. //inner_product(A[i], internal_data)
  132. void ComputeCoefsForNode (double &left_coeff, double &right_coeff, double &bottom_coeff, double &top_coeff, double &this_coeff, int i, int j){
  133.  
  134. int glob_x = GetGlobalX(i);
  135. int glob_y = GetGlobalY(j);
  136.  
  137. map <string,bool> neighbours = {
  138. {"left", true},
  139. {"right", true},
  140. {"bottom", true},
  141. {"top", true}
  142. };
  143.  
  144. left_coeff = 1.0;
  145. right_coeff = 1.0;
  146. bottom_coeff = 1.0;
  147. top_coeff = 1.0;
  148. this_coeff = 1.0;
  149.  
  150. switch (IsNodeInternalCornerOrSide(glob_x, glob_y)){
  151. case 2:
  152. //left bottom corner
  153. neighbours["left"] = false;
  154. neighbours["bottom"] = false;
  155. break;
  156. case 4:
  157. //left top corner
  158. neighbours["left"] = false;
  159. neighbours["top"] = false;
  160. break;
  161. case 6:
  162. //right bottom corner
  163. neighbours["right"] = false;
  164. neighbours["bottom"] = false;
  165. break;
  166. case 8:
  167. //right top corner
  168. neighbours["right"] = false;
  169. neighbours["top"] = false;
  170. break;
  171. case 1:
  172. //left side
  173. neighbours["left"] = false;
  174. break;
  175. case 3:
  176. //right side
  177. neighbours["right"] = false;
  178. break;
  179. case 5:
  180. //bottom side
  181. neighbours["bottom"] = false;
  182. break;
  183. case 7:
  184. //top side
  185. neighbours["top"] = false;
  186. break;
  187. case 0:
  188. //internal
  189. break;
  190. default:
  191. cout << "[ERROR]: Bad global coords compute coef. Global:" << glob_x << " " << glob_y<<endl;
  192. }
  193.  
  194. if (!neighbours["left"]){
  195. right_coeff = 2.0;
  196. left_coeff = 0.0;
  197. }
  198.  
  199. if (!neighbours["right"]){
  200. left_coeff = 2.0;
  201. right_coeff = 0.0;
  202. }
  203.  
  204. if (!neighbours["bottom"]){
  205. top_coeff = 2.0;
  206. bottom_coeff = 0.0;
  207. }
  208.  
  209. if (!neighbours["top"]){
  210. bottom_coeff = 2.0;
  211. top_coeff = 0.0;
  212. }
  213.  
  214. if (neighbours["left"]){
  215. left_coeff *= -k(x_left + (glob_x - 0.5) * h_x, y_bottom + glob_y * h_y) / pow(h_x, 2);
  216. }
  217.  
  218. if (neighbours["right"]){
  219. right_coeff *= -k(x_left + (glob_x + 0.5) * h_x, y_bottom + glob_y * h_y) / pow(h_x, 2);
  220. }
  221.  
  222. if (neighbours["bottom"]){
  223. bottom_coeff *= -k(x_left + glob_x * h_x, y_bottom + (glob_y - 0.5) * h_y) / pow(h_y, 2);
  224. }
  225.  
  226. if (neighbours["top"]){
  227. top_coeff *= -k(x_left + glob_x * h_x, y_bottom + (glob_y + 0.5) * h_y) / pow(h_y, 2);
  228. }
  229.  
  230. this_coeff = q(x_left + glob_x * h_x, y_bottom + glob_y * h_y) - left_coeff - right_coeff - bottom_coeff - top_coeff;
  231.  
  232. /*coefs[0] = left_coeff;
  233. coefs[1] = right_coeff;
  234. coefs[2] = bottom_coeff;
  235. coefs[3] = top_coeff;
  236. coefs[4] = this_coeff;*/
  237. }
  238.  
  239. double ComputeMagicInnerProductA_iw (int i, int j){
  240.  
  241. int glob_x = GetGlobalX(i);
  242. int glob_y = GetGlobalY(j);
  243.  
  244. double result = 0.0;
  245.  
  246. map <string,bool> neighbours = {
  247. {"left", true},
  248. {"right", true},
  249. {"bottom", true},
  250. {"top", true}
  251. };
  252.  
  253. vector <double> my_coefs = coefs[j + cur_block_size_y*i];
  254.  
  255. double left_neighbour = 0.0, right_neighbour = 0.0, bottom_neighbour = 0.0, top_neighbour = 0.0, this_node = 0.0;
  256. double left_coeff = my_coefs[0], right_coeff = my_coefs[1], bottom_coeff = my_coefs[2], top_coeff = my_coefs[3], this_coeff = my_coefs[4];
  257.  
  258. switch (IsNodeInternalCornerOrSide(glob_x, glob_y)){
  259. case 2:
  260. //left bottom corner
  261. neighbours["left"] = false;
  262. neighbours["bottom"] = false;
  263. break;
  264. case 4:
  265. //left top corner
  266. neighbours["left"] = false;
  267. neighbours["top"] = false;
  268. break;
  269. case 6:
  270. //right bottom corner
  271. neighbours["right"] = false;
  272. neighbours["bottom"] = false;
  273. break;
  274. case 8:
  275. //right top corner
  276. neighbours["right"] = false;
  277. neighbours["top"] = false;
  278. break;
  279. case 1:
  280. //left side
  281. neighbours["left"] = false;
  282. break;
  283. case 3:
  284. //right side
  285. neighbours["right"] = false;
  286. break;
  287. case 5:
  288. //bottom side
  289. neighbours["bottom"] = false;
  290. break;
  291. case 7:
  292. //top side
  293. neighbours["top"] = false;
  294. break;
  295. case 0:
  296. //internal
  297. break;
  298. default:
  299. cout << "[ERROR]: Bad global coords compute matrix. Global:" << glob_x << " " << glob_y<<endl;
  300. }
  301.  
  302.  
  303.  
  304. if (neighbours["left"]){
  305. left_neighbour = Get(glob_x - 1, glob_y);
  306. }
  307.  
  308. if (neighbours["right"]){
  309. right_neighbour = Get(glob_x + 1, glob_y);
  310. }
  311.  
  312. if (neighbours["bottom"]){
  313. bottom_neighbour = Get(glob_x, glob_y - 1);
  314. }
  315.  
  316. if (neighbours["top"]){
  317. top_neighbour = Get(glob_x, glob_y + 1);
  318. }
  319.  
  320. this_node = Get(glob_x, glob_y);
  321.  
  322. result = left_coeff * left_neighbour +
  323. right_coeff * right_neighbour +
  324. bottom_coeff * bottom_neighbour +
  325. top_coeff * top_neighbour +
  326. this_coeff * this_node;
  327.  
  328. return result;
  329. }
  330.  
  331. double GetNodeFromB(int current_node_global_offset_x, int current_node_global_offset_y) {
  332.  
  333. int glob_x = current_node_global_offset_x;
  334. int glob_y = current_node_global_offset_y;
  335.  
  336. double result = 0.0;
  337.  
  338. switch (IsNodeInternalCornerOrSide(glob_x, glob_y)){
  339. case 2:
  340. //left bottom corner
  341. result = F(x_left, y_bottom) + 2.0 / h_x * psi_L(y_bottom) + 2.0 / h_y * psi_B(x_left);
  342. break;
  343. case 4:
  344. //left top corner
  345. result = F(x_left, y_top) + 2.0 / h_x * psi_L(y_top) + 2.0 / h_y * psi_T(x_left);
  346. break;
  347. case 6:
  348. //right bottom corner
  349. result = F(x_right, y_bottom) + 2.0 / h_x * psi_R(y_bottom) + 2.0 / h_y * psi_B(x_right);
  350. break;
  351. case 8:
  352. //right top corner
  353. result = F(x_right, y_top) + 2.0 / h_x * psi_R(y_top) + 2.0 / h_y * psi_T(x_right);
  354. break;
  355. case 1:
  356. //left side
  357. result = F(x_left, y_bottom + glob_y * h_y) + 2.0 / h_x * psi_L(y_bottom + glob_y * h_y);
  358. break;
  359. case 3:
  360. //right side
  361. result = F(x_right, y_bottom + glob_y * h_y) + 2.0 / h_x * psi_R(y_bottom + glob_y * h_y);
  362. break;
  363. case 5:
  364. //bottom side
  365. result = F(x_left + glob_x * h_x, y_bottom) + 2.0 / h_y * psi_B(x_left + glob_x * h_x);
  366. break;
  367. case 7:
  368. //top side
  369. result = F(x_left + glob_x * h_x, y_top) + 2.0 / h_y * psi_T(x_left + glob_x * h_x);
  370. break;
  371. case 0:
  372. //internal
  373. result = F(x_left + glob_x * h_x, y_bottom + glob_y * h_y);
  374. break;
  375. default:
  376. cout << "[ERROR]: Bad global coords compute matrix. Global:" << glob_x << " " << glob_y <<endl;
  377.  
  378. }
  379.  
  380. return result;
  381.  
  382. }
  383.  
  384. double GetNodeFromExact(int current_node_global_offset_x, int current_node_global_offset_y) {
  385.  
  386. int glob_x = current_node_global_offset_x;
  387. int glob_y = current_node_global_offset_y;
  388.  
  389. return u(x_left + glob_x * h_x, y_bottom + glob_y * h_y);
  390.  
  391. }
  392.  
  393. void ComputeMatrixR(){
  394. /*if(my_rank == 0)
  395. cout << "[INFO]: Computation of matrix r started"<<endl;
  396. */
  397.  
  398. vector<double> r_tmp_matrix (cur_block_size_x*cur_block_size_y, 0.0);
  399.  
  400. //#pragma omp parallel for
  401. for(int i = 0; i < cur_block_size_x; ++i){
  402. for(int j = 0; j < cur_block_size_y; ++j){
  403.  
  404. int current_node_global_offset_x = GetGlobalX(i),
  405. current_node_global_offset_y = GetGlobalY(j);
  406.  
  407. int glob_x = current_node_global_offset_x,
  408. glob_y = current_node_global_offset_y;
  409.  
  410. r_tmp_matrix [ j + cur_block_size_y*i ] = ComputeMagicInnerProductA_iw(i,j) - GetNodeFromB(glob_x, glob_y);
  411.  
  412. }
  413. }
  414.  
  415. //#pragma omp parallel for
  416. for(int i = 0; i < cur_block_size_x; ++i){
  417. for(int j = 0; j < cur_block_size_y; ++j){
  418.  
  419. old_internal_data[ j + cur_block_size_y*i ] = internal_data[ j + cur_block_size_y*i];
  420. internal_data[ j + cur_block_size_y*i ] = r_tmp_matrix[ j + cur_block_size_y*i];
  421.  
  422. }
  423. }
  424.  
  425. //old_internal_data = internal_data;
  426. //internal_data = r_tmp_matrix;
  427.  
  428. SyncMPI();
  429.  
  430. }
  431.  
  432. double ComputeTauAndStopCase(bool &should_i_stop){
  433.  
  434. double local_Ar_r_inner_product_sum = 0.0;
  435. double local_Ar_Ar_inner_product_sum = 0.0;
  436. double global_Ar_r_inner_product_sum = 0.0;
  437. double global_Ar_Ar_inner_product_sum = 0.0;
  438. double local_r_norm = 0.0;
  439. double global_r_norm = 0.0;
  440.  
  441. //#pragma omp parallel for
  442. for(int i = 0; i < cur_block_size_x; ++i){
  443. for(int j = 0; j < cur_block_size_y; ++j){
  444. double rho = 1.0;
  445.  
  446. int current_node_global_offset_x = GetGlobalX(i),
  447. current_node_global_offset_y = GetGlobalY(j);
  448.  
  449. int glob_x = current_node_global_offset_x,
  450. glob_y = current_node_global_offset_y;
  451.  
  452. double tmp_Ar_i_j = ComputeMagicInnerProductA_iw(i, j);
  453.  
  454. switch (IsNodeInternalCornerOrSide(glob_x, glob_y)){
  455. case 2:
  456. case 4:
  457. case 6:
  458. case 8:
  459. //angle
  460. rho = 0.25;
  461. break;
  462. case 1:
  463. case 3:
  464. case 5:
  465. case 7:
  466. //side
  467. rho = 0.5;
  468. break;
  469. case 0:
  470. //internal
  471. rho = 1.0;
  472. break;
  473. default:
  474. cout << "[ERROR]: Bad global coords compute tau. Global:" << glob_x << " " << glob_y << endl;
  475. }
  476.  
  477. double tmp_cur_node_value = Get(glob_x, glob_y);
  478.  
  479. local_Ar_r_inner_product_sum += rho * tmp_Ar_i_j * tmp_cur_node_value * h_x*h_y;
  480. local_Ar_Ar_inner_product_sum += rho * pow (tmp_Ar_i_j, 2) * h_x*h_y;
  481. local_r_norm += rho * pow(tmp_cur_node_value, 2) * h_x*h_y;
  482.  
  483. }
  484. }
  485.  
  486. //cout << "[DEBUG]: Local"<< local_Ar_r_inner_product_sum << endl;
  487.  
  488. MPI_Allreduce(&local_Ar_r_inner_product_sum, &global_Ar_r_inner_product_sum, 1, MPI_DOUBLE, MPI_SUM,
  489. comm);
  490.  
  491. //cout << "[DEBUG]: "<< global_Ar_r_inner_product_sum << endl;
  492.  
  493. MPI_Allreduce(&local_Ar_Ar_inner_product_sum, &global_Ar_Ar_inner_product_sum, 1, MPI_DOUBLE, MPI_SUM,
  494. MPI_COMM_WORLD);
  495.  
  496. //cout << "[DEBUG]: "<< global_Ar_Ar_inner_product_sum << endl;
  497.  
  498. double global_tau = global_Ar_r_inner_product_sum/ global_Ar_Ar_inner_product_sum;
  499.  
  500. MPI_Allreduce(&local_r_norm, &global_r_norm, 1, MPI_DOUBLE, MPI_SUM,
  501. MPI_COMM_WORLD);
  502.  
  503. double eps = 1e-06;
  504.  
  505. if (global_r_norm < 0){
  506. cout << "[ERROR]: bad global r norm" << endl;
  507. }
  508.  
  509. current_norm = fabs(global_tau)*sqrt(global_r_norm);
  510.  
  511. //if (my_rank == 0)
  512. // cout << "[DEBUG]: solution norm "<< tmp_norm << endl;
  513.  
  514. if (current_norm <= eps){
  515. if (my_rank == 0)
  516. cout << "[INFO]: r norm is " << sqrt(global_r_norm) << endl;
  517. should_i_stop = true;
  518. }else{
  519. should_i_stop = false;
  520. }
  521.  
  522. return global_tau;
  523.  
  524. }
  525.  
  526.  
  527. void ComputeNewW(double tau){
  528. //#pragma omp parallel for
  529. for(int i = 0; i < cur_block_size_x; ++i){
  530. for(int j = 0; j < cur_block_size_y; ++j){
  531. internal_data[ j + cur_block_size_y*i ] = old_internal_data[ j + cur_block_size_y*i ] - tau * internal_data[ j + cur_block_size_y*i ];
  532. //old_internal_data[ j + cur_block_size_y*i ] = 0.0;
  533. }
  534. }
  535. }
  536.  
  537. int GetGlobalX(int i){
  538. return cur_block_global_offset_x + i;
  539. }
  540.  
  541. int GetGlobalY(int j){
  542. return cur_block_global_offset_y + j;
  543. }
  544.  
  545. public:
  546. MPIComputations(int inpM, int inpN, int inpGx, int inpGy, int inpx_left, int inpx_right, int inpy_bottom, int inpy_top, int inp_cur_block_global_coors_x, int inp_cur_block_global_coors_y, int inprank, MPI_Comm inpcomm){
  547.  
  548. M = inpM;
  549. N = inpN;
  550.  
  551. Gx = inpGx;
  552. Gy = inpGy;
  553.  
  554. x_left = inpx_left;
  555. x_right = inpx_right;
  556. y_bottom = inpy_bottom;
  557. y_top = inpy_top;
  558.  
  559. h_x = double((x_right - x_left)) / M;
  560. h_y = double((y_top - y_bottom)) / N;
  561.  
  562. my_rank = inprank;
  563. comm = inpcomm;
  564.  
  565. cur_block_global_coors_x = inp_cur_block_global_coors_x;
  566. cur_block_global_coors_y = inp_cur_block_global_coors_y;
  567.  
  568. global_block_size_x = (M + 1) / Gx;
  569. global_block_size_y = (N + 1) / Gy;
  570.  
  571. cur_block_size_x = global_block_size_x;
  572. cur_block_size_y = global_block_size_y;
  573.  
  574. cur_block_global_offset_x = global_block_size_x * cur_block_global_coors_x;
  575. cur_block_global_offset_y = global_block_size_y * cur_block_global_coors_y;
  576.  
  577. //if ((cur_block_global_offset_x + global_block_size_x + Gx) > (M + 1)){
  578. if(cur_block_global_coors_x == Gx - 1 ){
  579. cur_block_size_x += (M + 1) % Gx;
  580. }
  581.  
  582. //if ((cur_block_global_offset_y + global_block_size_y + Gy) > (N + 1)){
  583. if(cur_block_global_coors_y == Gy - 1){
  584. cur_block_size_y += (N + 1) % Gy;
  585. }
  586.  
  587. internal_data.resize(cur_block_size_x * cur_block_size_y);
  588. old_internal_data.resize(cur_block_size_x * cur_block_size_y);
  589.  
  590.  
  591.  
  592. coefs(cur_block_size_x * cur_block_size_y, vector<double>(5, 0.0));
  593.  
  594. for(int i = 0; i < cur_block_size_x; i++){
  595. for (int j = 0; j < cur_block_size_y; j++){
  596. vector <double> my_coefs = coefs[j + cur_block_size_y*i];
  597. ComputeCoefsForNode(my_coefs[0], my_coefs[1], my_coefs[2], my_coefs[3], my_coefs[4], i, j)
  598. }
  599. }
  600.  
  601. //OX
  602. external_data[0].resize(cur_block_size_y);
  603. external_data[1].resize(cur_block_size_y);
  604.  
  605. //OY
  606. external_data[2].resize(cur_block_size_x);
  607. external_data[3].resize(cur_block_size_x);
  608.  
  609. }
  610.  
  611. double Get(int i, int j) {
  612. return GetLocalIndex(i - cur_block_global_offset_x, j - cur_block_global_offset_y);
  613. }
  614.  
  615. void Set(int i, int j, double v) {
  616. return SetLocalIndex(i - cur_block_global_offset_x, j - cur_block_global_offset_y, v);
  617. }
  618.  
  619. void SyncMPI(){
  620.  
  621. /*if(my_rank == 0)
  622. cout << "[INFO]: Sync started"<< endl;
  623. */
  624.  
  625. //MPI_Barrier(MPI_COMM_WORLD);
  626.  
  627. //left and right sides
  628. //#pragma omp parallel for
  629. for(int j = 0; j < cur_block_size_y; ++j){
  630.  
  631. external_data[ 0 ][ j ] = GetLocalIndex(0, j);//internal_data[ j ];
  632. external_data[ 1 ][ j ] = GetLocalIndex(cur_block_size_x - 1, j);//internal_data[ j + cur_block_size_y * (cur_block_size_x - 1) ];
  633.  
  634. }
  635.  
  636. //bottom and top sides
  637. //#pragma omp parallel for
  638. for(int i = 0; i < cur_block_size_x; ++i){
  639.  
  640. external_data[ 2 ][ i ] = GetLocalIndex(i, 0);//internal_data[ cur_block_size_y*i ];
  641. external_data[ 3 ][ i ] = GetLocalIndex(i, cur_block_size_y - 1); //internal_data[ (cur_block_size_y - 1) + cur_block_size_y*i ];
  642.  
  643. }
  644.  
  645. int my_coords[2];
  646. int targets_ranks[4];
  647.  
  648. MPI_Cart_coords(comm, my_rank, 2, my_coords);
  649.  
  650. int neighbour_offsets[ 4 ][ 2 ] = {
  651. { -1, 0 },{ 1, 0 },
  652. { 0, -1 },{ 0, 1 }
  653. };
  654.  
  655. //#pragma omp parallel for
  656. for(int i = 0; i < 4; i++){
  657.  
  658. int target_coords[2];
  659.  
  660. target_coords[ 0 ] = my_coords[ 0 ] + neighbour_offsets[ i ][ 0 ];
  661. target_coords[ 1 ] = my_coords[ 1 ] + neighbour_offsets[ i ][ 1 ];
  662.  
  663. if (target_coords[0] >= 0 && target_coords[0] < Gx && target_coords[1] >= 0 && target_coords[1] < Gy){
  664.  
  665. MPI_Cart_rank(comm, target_coords, &targets_ranks[ i ]);
  666.  
  667. }
  668. else{
  669. targets_ranks[i] = -1;
  670. }
  671. }
  672.  
  673. //Now we have rank for all targets
  674.  
  675. for(int axis = 0; axis < 2; axis++){
  676.  
  677. int parity_bit = (my_coords[ axis ]) % 2;
  678.  
  679. //if parity_bit == 0 then
  680. // zuerst mit links, dann mit rechts tauschen
  681. //elif parity_bit == 1:
  682. // zuerst mit rechts, dann mit links tauschen
  683.  
  684. for(int tmp = 0; tmp < 2; tmp++){
  685. parity_bit = 1 - parity_bit;
  686.  
  687. //target id in external_data und targets_ranks
  688. int target_idx = 2 * axis + (1 - parity_bit);
  689.  
  690. if (targets_ranks[target_idx] != -1){
  691.  
  692. int send_tag = 100000 + my_rank * 100 + axis * 10 + parity_bit;
  693. int recv_tag = 100000 + targets_ranks[ target_idx ] * 100 + axis * 10 + (1-parity_bit);
  694.  
  695. MPI_Status tmp_status;
  696.  
  697. if(my_rank != targets_ranks[ target_idx ]){
  698.  
  699. MPI_Sendrecv_replace(&external_data[ target_idx ][ 0 ], external_data[ target_idx ].size(),
  700. MPI_DOUBLE, targets_ranks[ target_idx ], send_tag, targets_ranks[ target_idx ], recv_tag,
  701. comm, &tmp_status);
  702.  
  703. }
  704. }
  705. }
  706. }
  707.  
  708. //MPI_Barrier(MPI_COMM_WORLD);
  709.  
  710. }
  711.  
  712. void DoIteration(bool &should_i_stop){
  713.  
  714. ComputeMatrixR();
  715.  
  716. //Now R Matrix is in internal_data
  717.  
  718. double tau = ComputeTauAndStopCase(should_i_stop);
  719.  
  720. //We have in our block jetzt:
  721. //in internal_data: R Matrix
  722. //in old_internal_data: w aus letzte iteration
  723. //and we have tau
  724. //jetzt koennen wir naechste w finden
  725.  
  726. ComputeNewW(tau);
  727.  
  728. SyncMPI();
  729.  
  730. }
  731.  
  732. double GetLocalIndex(int i, int j){
  733. //internal data
  734. if ((j >= 0) && (j < cur_block_size_y) && (i >= 0) && (i < cur_block_size_x)){
  735. return internal_data[ j + cur_block_size_y*i ];
  736. }
  737.  
  738. //external data
  739. //OX
  740. if((j >= 0) && (j < cur_block_size_y)){
  741.  
  742. if (i == -1)
  743. return external_data[ 0 ][ j ];
  744.  
  745. if (i == cur_block_size_x)
  746. return external_data[ 1 ][ j ];
  747.  
  748. }
  749.  
  750. //OY
  751. if((i >= 0) && (i < cur_block_size_x)){
  752.  
  753. if (j == -1)
  754. return external_data[ 2 ][ i ];
  755. if (j == cur_block_size_y)
  756. return external_data[ 3 ][ i ];
  757.  
  758. }
  759.  
  760. cout << "[ERROR]: bad local index" << endl;
  761.  
  762. return nan("");
  763. }
  764.  
  765. void SetLocalIndex(int i, int j, double v){
  766. if ((j >= 0) && (j < cur_block_size_y) && (i >= 0) && (i < cur_block_size_x)){
  767. internal_data[ j + cur_block_size_y*i ] = v;
  768. }else{
  769. cout << "[ERROR]: trying to set data outside the local area" << endl;
  770. }
  771. }
  772.  
  773. double CompareWithExact() {
  774.  
  775. double local_diff_norm = 0.0;
  776. double global_diff_norm = 0.0;
  777. /*
  778. if (my_rank == 0)
  779. cout << "[INFO]: Starting computing compare with exact" << endl;
  780. */
  781. vector <double> tmp_elements(cur_block_size_x * cur_block_size_y, 0.0);
  782.  
  783. //#pragma omp parallel for
  784. for (int i = 0; i < cur_block_size_x; ++i){
  785. for (int j = 0; j < cur_block_size_y; ++j){
  786.  
  787.  
  788. int current_node_global_offset_x = GetGlobalX(i),
  789. current_node_global_offset_y = GetGlobalY(j);
  790.  
  791. int glob_x = current_node_global_offset_x,
  792. glob_y = current_node_global_offset_y;
  793.  
  794. double tmp_elem = Get(glob_x, glob_y) - GetNodeFromExact(glob_x, glob_y);
  795.  
  796. //local_diff_norm = max( fabs(tmp_elem), local_diff_norm);
  797.  
  798. //local_diff_norm += rho * pow(tmp_elem, 2) * h_x * h_y;
  799.  
  800. tmp_elements [j + cur_block_size_y*i] = tmp_elem;
  801.  
  802. }
  803. }
  804.  
  805. //cout << "[INFO]: local max diff in " << cur_block_global_offset_x << " " << cur_block_global_offset_y << " " << local_diff_norm << endl;
  806.  
  807.  
  808. for (int i = 0; i < cur_block_size_x; ++i){
  809. for (int j = 0; j < cur_block_size_y; ++j){
  810.  
  811. double rho = 1.0;
  812.  
  813. int current_node_global_offset_x = GetGlobalX(i),
  814. current_node_global_offset_y = GetGlobalY(j);
  815.  
  816. int glob_x = current_node_global_offset_x,
  817. glob_y = current_node_global_offset_y;
  818.  
  819. switch (IsNodeInternalCornerOrSide(glob_x, glob_y)){
  820. case 2:
  821. case 4:
  822. case 6:
  823. case 8:
  824. //angle
  825. rho = 0.25;
  826. break;
  827. case 1:
  828. case 3:
  829. case 5:
  830. case 7:
  831. //side
  832. rho = 0.5;
  833. break;
  834. case 0:
  835. //internal
  836. rho = 1.0;
  837. break;
  838. default:
  839. cout << "[ERROR]: Bad global coords compute exact. Local:" << i << " " << j << ". Global:" << glob_x << " " << glob_y <<endl;
  840. }
  841.  
  842.  
  843. double tmp_elem = tmp_elements [j + cur_block_size_y*i];
  844.  
  845. local_diff_norm = max( fabs(tmp_elem), local_diff_norm);
  846. //local_diff_norm += rho * pow(tmp_elem, 2) * h_x * h_y;
  847. }
  848. }
  849.  
  850. MPI_Allreduce(&local_diff_norm, &global_diff_norm, 1, MPI_DOUBLE, MPI_MAX,
  851. MPI_COMM_WORLD);
  852.  
  853. //global_diff_norm = sqrt(global_diff_norm);
  854.  
  855.  
  856. return global_diff_norm;
  857. }
  858.  
  859. double GetCurrentNorm(){
  860. return current_norm;
  861. }
  862. };
  863.  
  864.  
  865.  
  866.  
  867. int main(int argc, char* argv[]){
  868.  
  869. const double x_left = -2, x_right = 3;
  870. const double y_bottom = -1, y_top = 4;
  871.  
  872. double time_start, time_stop;
  873. int N, Gx, Gy;
  874. int dim[2], period[2];
  875.  
  876. MPI_Comm comm;
  877.  
  878. //N - global grid size
  879. N = atoi(argv[1]);
  880.  
  881. //Gx
  882. Gx = dim[0] = atoi(argv[2]);
  883.  
  884. //Gy
  885. Gy = dim[1] = atoi(argv[3]);
  886.  
  887. period[0]=0;
  888. period[1]=0;
  889.  
  890.  
  891. MPI_Init(&argc, &argv);
  892.  
  893. time_start = MPI_Wtime();
  894.  
  895. int world_size;
  896.  
  897. int my_rank;
  898.  
  899. MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
  900. MPI_Comm_size(MPI_COMM_WORLD, &world_size);
  901.  
  902.  
  903. if(my_rank == 0){
  904. cout << "[INFO]: N = " << N << endl;
  905. cout << "[INFO]: Gx = " << dim[0] << endl;
  906. cout << "[INFO]: Gy = " << dim[1] << endl;
  907. }
  908.  
  909.  
  910.  
  911. if(argc <= 3){
  912. if(my_rank == 0)
  913. cout << "[ERROR]: Usage: mpieval <N> <Gx> <Gy>" << endl;
  914.  
  915. MPI_Abort(MPI_COMM_WORLD, 1);
  916. }
  917.  
  918. if(Gx * Gy != world_size){
  919. if(my_rank == 0)
  920. cout << "[ERROR]: mpi world size is not equal to "<< Gx << "*" << Gy << endl;
  921.  
  922. MPI_Abort(MPI_COMM_WORLD, 1);
  923. }
  924.  
  925. MPI_Cart_create(MPI_COMM_WORLD, 2, dim, period, 1, &comm);
  926.  
  927. /*if(my_rank == 0)
  928. cout << "[INFO]: Cart created"<<endl;
  929. */
  930.  
  931. MPI_Comm_rank(comm, &my_rank);
  932.  
  933. int my_coords[2];
  934.  
  935. MPI_Cart_coords(comm, my_rank, 2, my_coords);
  936.  
  937.  
  938.  
  939.  
  940. class MPIComputations w_func(N, N, Gx, Gy, x_left, x_right, y_bottom, y_top, my_coords[0], my_coords[1], my_rank, comm);
  941.  
  942.  
  943. int iteration_num = 0;
  944.  
  945. bool should_i_stop = false;
  946.  
  947. while ( should_i_stop != true ){
  948.  
  949. w_func.DoIteration(should_i_stop);
  950.  
  951. if ( (my_rank == 0) && (iteration_num % 10000 == 0) ){
  952. cout << "[INFO]: Iteration " << iteration_num << " difference with previous is: "<< w_func.GetCurrentNorm() << endl;
  953. }
  954.  
  955. iteration_num++;
  956. }
  957.  
  958. //MPI_Barrier(MPI_COMM_WORLD);
  959.  
  960. //vector <double> local_elements;
  961.  
  962. double comparing = w_func.CompareWithExact();
  963.  
  964. if (my_rank == 0)
  965. cout << "[INFO]: Diff with exact solution: " << comparing << endl;
  966.  
  967.  
  968.  
  969. //myfile.close();
  970.  
  971. time_stop = MPI_Wtime();
  972. if( my_rank == 0 )
  973. cout << "Finished!" << endl
  974. << "Total iterations: " << iteration_num << endl
  975. << "Elapsed time: " << (time_stop - time_start) << endl
  976. << "Time per iteration: " << (time_stop - time_start) / double(iteration_num) << endl;
  977.  
  978. MPI_Finalize();
  979.  
  980. return 0;
  981. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement