Advertisement
manac68974

Untitled

Dec 22nd, 2019
249
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 33.85 KB | None | 0 0
  1. #include <mpi.h>
  2. #include <omp.h>
  3.  
  4. #include <iomanip>
  5. #include <iostream>
  6. #include <vector>
  7. #include <map>
  8. #include <cmath>
  9. #include <cstdlib>
  10. #include <algorithm>
  11. #include <fstream>
  12. #include<sstream>
  13.  
  14. using namespace std;
  15.  
  16. class MPIComputations{
  17. int M, N;
  18. int cur_block_size_x, cur_block_size_y, cur_block_global_offset_x, cur_block_global_offset_y;
  19. int Gx, Gy;
  20. int cur_block_global_coors_x, cur_block_global_coors_y;
  21. double h_x, h_y;
  22. int global_block_size_x, global_block_size_y;
  23. int my_rank;
  24. double current_norm;
  25. MPI_Comm comm;
  26. double x_left, x_right, y_bottom, y_top;
  27. ofstream my_file;
  28.  
  29. vector<double> internal_data;
  30. vector<double> old_internal_data;
  31. vector<double> external_data[4];
  32.  
  33. vector <vector <double> > coefs;
  34.  
  35. int IsNodeInternalCornerOrSide(int current_node_global_offset_x, int current_node_global_offset_y){
  36.  
  37. if ((cur_block_global_coors_x > 0 ) && (cur_block_global_coors_x < Gx - 1) && (cur_block_global_coors_y > 0) && (cur_block_global_coors_y < Gy - 1))
  38. return 0;
  39.  
  40. //corners
  41. //left bottom corner
  42. if (current_node_global_offset_x == 0 && current_node_global_offset_y == 0){
  43. return 2;
  44. }
  45.  
  46. //left top corner
  47. if (current_node_global_offset_x == 0 && current_node_global_offset_y == N){
  48. return 4;
  49. }
  50.  
  51. //right bottom corner
  52. if (current_node_global_offset_x == M && current_node_global_offset_y == 0){
  53. return 6;
  54. }
  55.  
  56. //right top corner
  57. if (current_node_global_offset_x == M && current_node_global_offset_y == N){
  58. return 8;
  59. }
  60.  
  61. //sides
  62. //left side
  63. if (current_node_global_offset_y >= 1 && current_node_global_offset_y <= N - 1 &&
  64. current_node_global_offset_x == 0){
  65. return 1;
  66. }
  67.  
  68. //right side
  69. if (current_node_global_offset_y >= 1 && current_node_global_offset_y <= N - 1 &&
  70. current_node_global_offset_x == M){
  71. return 3;
  72. }
  73.  
  74. //bottom side
  75. if (current_node_global_offset_x >= 1 && current_node_global_offset_x <= M - 1 &&
  76. current_node_global_offset_y == 0){
  77. return 5;
  78. }
  79.  
  80. //top side
  81. if ((current_node_global_offset_x >= 1 && current_node_global_offset_x <= M - 1 &&
  82. current_node_global_offset_y == N)){
  83. return 7;
  84. }
  85.  
  86. //internal
  87. if ((current_node_global_offset_x >= 1 && current_node_global_offset_x <= M - 1) &&
  88. (current_node_global_offset_y >= 1 && current_node_global_offset_y <= N - 1)){
  89. return 0;
  90. }
  91.  
  92. return -1;
  93. }
  94.  
  95. double k(double x, double y) {
  96. return 1 + pow(x + y, 2);
  97. }
  98.  
  99. double q(double x, double y) {
  100. return 1;
  101. }
  102.  
  103. double u(double x, double y) {
  104. return 2.0 / (1 + pow(x, 2) + pow(y, 2));
  105. }
  106.  
  107. // psi_R(x, y) = k(A2, y) * du/dx(A2, y)
  108. double psi_R(double y) {
  109. return (-12) * (pow((y + 3), 2) + 1) / pow((pow(y, 2) + 10), 2);
  110. }
  111.  
  112. // psi_L(x, y) = -k(A1, y) * du/dx(A1, y)
  113. double psi_L(double y) {
  114. return (-8) * (pow((y - 2), 2) + 1) / pow((pow(y, 2) + 5), 2);
  115. }
  116.  
  117. // psi_T(x, y) = k(x, B2) * du/dy(x, B2)
  118. double psi_T(double x) {
  119. return (-16) * (pow((x + 4), 2) + 1) / pow((pow(x, 2) + 17), 2);
  120. }
  121.  
  122. // psi_B(x, y) = -k(x, B1) * du/dy(x, B1)
  123. double psi_B(double x) {
  124. return (-4) * (pow((x - 1), 2) + 1) / pow((pow(x, 2) + 2), 2);
  125. }
  126.  
  127. // right-part function of Poisson equation
  128. double F(double x, double y) {
  129. return 2 * (pow(x,4) + pow(y,4) + 2 * (pow(x,2) + 3) * pow(y,2) + 6 * pow(x,2) + 16*x*y + 5)
  130. / pow((1 + pow(x, 2) + pow(y, 2)), 3);
  131. }
  132.  
  133. //inner_product(A[i], internal_data)
  134.  
  135. //inner_product(A[i], internal_data)
  136. void ComputeCoefsForNode (double &left_coeff, double &right_coeff, double &bottom_coeff, double &top_coeff, double &this_coeff, int i, int j){
  137.  
  138. int glob_x = GetGlobalX(i);
  139. int glob_y = GetGlobalY(j);
  140.  
  141.  
  142. map <string,bool> neighbours;
  143. neighbours["left"] = true;
  144. neighbours["right"] = true;
  145. neighbours["bottom"] = true;
  146. neighbours["top"] = true;
  147.  
  148. left_coeff = 1.0;
  149. right_coeff = 1.0;
  150. bottom_coeff = 1.0;
  151. top_coeff = 1.0;
  152. this_coeff = 1.0;
  153. if (i > 0 && i < cur_block_size_x - 1 && j > 0 && j < cur_block_size_y - 1)
  154. ;
  155. else{
  156. switch (IsNodeInternalCornerOrSide(glob_x, glob_y)){
  157. case 2:
  158. //left bottom corner
  159. neighbours["left"] = false;
  160. neighbours["bottom"] = false;
  161. break;
  162. case 4:
  163. //left top corner
  164. neighbours["left"] = false;
  165. neighbours["top"] = false;
  166. break;
  167. case 6:
  168. //right bottom corner
  169. neighbours["right"] = false;
  170. neighbours["bottom"] = false;
  171. break;
  172. case 8:
  173. //right top corner
  174. neighbours["right"] = false;
  175. neighbours["top"] = false;
  176. break;
  177. case 1:
  178. //left side
  179. neighbours["left"] = false;
  180. break;
  181. case 3:
  182. //right side
  183. neighbours["right"] = false;
  184. break;
  185. case 5:
  186. //bottom side
  187. neighbours["bottom"] = false;
  188. break;
  189. case 7:
  190. //top side
  191. neighbours["top"] = false;
  192. break;
  193. case 0:
  194. //internal
  195. break;
  196. default:
  197. cout << "[ERROR]: Bad global coords compute coef. Global:" << glob_x << " " << glob_y<<endl;
  198. }
  199.  
  200.  
  201. if (!neighbours["left"]){
  202. right_coeff = 2.0;
  203. left_coeff = 0.0;
  204. }
  205.  
  206. if (!neighbours["right"]){
  207. left_coeff = 2.0;
  208. right_coeff = 0.0;
  209. }
  210.  
  211. if (!neighbours["bottom"]){
  212. top_coeff = 2.0;
  213. bottom_coeff = 0.0;
  214. }
  215.  
  216. if (!neighbours["top"]){
  217. bottom_coeff = 2.0;
  218. top_coeff = 0.0;
  219. }
  220. }
  221.  
  222. if (neighbours["left"]){
  223. left_coeff *= -k(x_left + (glob_x - 0.5) * h_x, y_bottom + glob_y * h_y) / pow(h_x, 2);
  224. }
  225.  
  226. if (neighbours["right"]){
  227. right_coeff *= -k(x_left + (glob_x + 0.5) * h_x, y_bottom + glob_y * h_y) / pow(h_x, 2);
  228. }
  229.  
  230. if (neighbours["bottom"]){
  231. bottom_coeff *= -k(x_left + glob_x * h_x, y_bottom + (glob_y - 0.5) * h_y) / pow(h_y, 2);
  232. }
  233.  
  234. if (neighbours["top"]){
  235. top_coeff *= -k(x_left + glob_x * h_x, y_bottom + (glob_y + 0.5) * h_y) / pow(h_y, 2);
  236. }
  237.  
  238. this_coeff = q(x_left + glob_x * h_x, y_bottom + glob_y * h_y) - left_coeff - right_coeff - bottom_coeff - top_coeff;
  239.  
  240. /*coefs[0] = left_coeff;
  241. coefs[1] = right_coeff;
  242. coefs[2] = bottom_coeff;
  243. coefs[3] = top_coeff;
  244. coefs[4] = this_coeff;*/
  245. }
  246.  
  247. double ComputeMagicInnerProductA_iw (int i, int j){
  248.  
  249. int glob_x = GetGlobalX(i);
  250. int glob_y = GetGlobalY(j);
  251.  
  252. double result = 0.0;
  253.  
  254. map <string,bool> neighbours;
  255. neighbours["left"] = true;
  256. neighbours["right"] = true;
  257. neighbours["bottom"] = true;
  258. neighbours["top"] = true;
  259.  
  260.  
  261. vector <double> my_coefs = coefs[j + cur_block_size_y*i];
  262.  
  263. double left_neighbour = 0.0, right_neighbour = 0.0, bottom_neighbour = 0.0, top_neighbour = 0.0, this_node = 0.0;
  264. double left_coeff = my_coefs[0], right_coeff = my_coefs[1], bottom_coeff = my_coefs[2], top_coeff = my_coefs[3], this_coeff = my_coefs[4];
  265.  
  266.  
  267. if (i > 0 && i < cur_block_size_x - 1 && j > 0 && j < cur_block_size_y - 1)
  268. ;
  269. else{
  270. switch (IsNodeInternalCornerOrSide(glob_x, glob_y)){
  271. case 2:
  272. //left bottom corner
  273. neighbours["left"] = false;
  274. neighbours["bottom"] = false;
  275. break;
  276. case 4:
  277. //left top corner
  278. neighbours["left"] = false;
  279. neighbours["top"] = false;
  280. break;
  281. case 6:
  282. //right bottom corner
  283. neighbours["right"] = false;
  284. neighbours["bottom"] = false;
  285. break;
  286. case 8:
  287. //right top corner
  288. neighbours["right"] = false;
  289. neighbours["top"] = false;
  290. break;
  291. case 1:
  292. //left side
  293. neighbours["left"] = false;
  294. break;
  295. case 3:
  296. //right side
  297. neighbours["right"] = false;
  298. break;
  299. case 5:
  300. //bottom side
  301. neighbours["bottom"] = false;
  302. break;
  303. case 7:
  304. //top side
  305. neighbours["top"] = false;
  306. break;
  307. case 0:
  308. //internal
  309. break;
  310. default:
  311. cout << "[ERROR]: Bad global coords compute matrix. Global:" << glob_x << " " << glob_y<<endl;
  312. }
  313. }
  314.  
  315.  
  316.  
  317. if (neighbours["left"]){
  318. left_neighbour = Get(glob_x - 1, glob_y);
  319. }
  320.  
  321. if (neighbours["right"]){
  322. right_neighbour = Get(glob_x + 1, glob_y);
  323. }
  324.  
  325. if (neighbours["bottom"]){
  326. bottom_neighbour = Get(glob_x, glob_y - 1);
  327. }
  328.  
  329. if (neighbours["top"]){
  330. top_neighbour = Get(glob_x, glob_y + 1);
  331. }
  332.  
  333. this_node = Get(glob_x, glob_y);
  334.  
  335. result = left_coeff * left_neighbour +
  336. right_coeff * right_neighbour +
  337. bottom_coeff * bottom_neighbour +
  338. top_coeff * top_neighbour +
  339. this_coeff * this_node;
  340.  
  341. return result;
  342. }
  343.  
  344. double GetNodeFromB(int current_node_global_offset_x, int current_node_global_offset_y) {
  345.  
  346. int glob_x = current_node_global_offset_x;
  347. int glob_y = current_node_global_offset_y;
  348.  
  349. double result = 0.0;
  350.  
  351. switch (IsNodeInternalCornerOrSide(glob_x, glob_y)){
  352. case 2:
  353. //left bottom corner
  354. result = F(x_left, y_bottom) + 2.0 / h_x * psi_L(y_bottom) + 2.0 / h_y * psi_B(x_left);
  355. break;
  356. case 4:
  357. //left top corner
  358. result = F(x_left, y_top) + 2.0 / h_x * psi_L(y_top) + 2.0 / h_y * psi_T(x_left);
  359. break;
  360. case 6:
  361. //right bottom corner
  362. result = F(x_right, y_bottom) + 2.0 / h_x * psi_R(y_bottom) + 2.0 / h_y * psi_B(x_right);
  363. break;
  364. case 8:
  365. //right top corner
  366. result = F(x_right, y_top) + 2.0 / h_x * psi_R(y_top) + 2.0 / h_y * psi_T(x_right);
  367. break;
  368. case 1:
  369. //left side
  370. result = F(x_left, y_bottom + glob_y * h_y) + 2.0 / h_x * psi_L(y_bottom + glob_y * h_y);
  371. break;
  372. case 3:
  373. //right side
  374. result = F(x_right, y_bottom + glob_y * h_y) + 2.0 / h_x * psi_R(y_bottom + glob_y * h_y);
  375. break;
  376. case 5:
  377. //bottom side
  378. result = F(x_left + glob_x * h_x, y_bottom) + 2.0 / h_y * psi_B(x_left + glob_x * h_x);
  379. break;
  380. case 7:
  381. //top side
  382. result = F(x_left + glob_x * h_x, y_top) + 2.0 / h_y * psi_T(x_left + glob_x * h_x);
  383. break;
  384. case 0:
  385. //internal
  386. result = F(x_left + glob_x * h_x, y_bottom + glob_y * h_y);
  387. break;
  388. default:
  389. cout << "[ERROR]: Bad global coords compute matrix. Global:" << glob_x << " " << glob_y <<endl;
  390.  
  391. }
  392.  
  393. return result;
  394.  
  395. }
  396.  
  397. double GetNodeFromExact(int current_node_global_offset_x, int current_node_global_offset_y) {
  398.  
  399. int glob_x = current_node_global_offset_x;
  400. int glob_y = current_node_global_offset_y;
  401.  
  402. return u(x_left + glob_x * h_x, y_bottom + glob_y * h_y);
  403.  
  404. }
  405.  
  406. void ComputeMatrixR(){
  407. /*if(my_rank == 0)
  408. cout << "[INFO]: Computation of matrix r started"<<endl;
  409. */
  410.  
  411. vector<double> r_tmp_matrix (cur_block_size_x*cur_block_size_y, 0.0);
  412.  
  413. #pragma omp parallel for
  414. for(int i = 0; i < cur_block_size_x; ++i){
  415. for(int j = 0; j < cur_block_size_y; ++j){
  416.  
  417. int current_node_global_offset_x = GetGlobalX(i),
  418. current_node_global_offset_y = GetGlobalY(j);
  419.  
  420. int glob_x = current_node_global_offset_x,
  421. glob_y = current_node_global_offset_y;
  422.  
  423. r_tmp_matrix [ j + cur_block_size_y*i ] = ComputeMagicInnerProductA_iw(i,j) - GetNodeFromB(glob_x, glob_y);
  424.  
  425. }
  426. }
  427.  
  428. #pragma omp parallel for
  429. for(int i = 0; i < cur_block_size_x; ++i){
  430. for(int j = 0; j < cur_block_size_y; ++j){
  431.  
  432. old_internal_data[ j + cur_block_size_y*i ] = internal_data[ j + cur_block_size_y*i];
  433. internal_data[ j + cur_block_size_y*i ] = r_tmp_matrix[ j + cur_block_size_y*i];
  434.  
  435. }
  436. }
  437.  
  438. //old_internal_data = internal_data;
  439. //internal_data = r_tmp_matrix;
  440.  
  441. SyncMPI();
  442.  
  443. }
  444.  
  445. double ComputeTauAndStopCase(bool &should_i_stop){
  446.  
  447. double local_Ar_r_inner_product_sum = 0.0;
  448. double local_Ar_Ar_inner_product_sum = 0.0;
  449. double global_Ar_r_inner_product_sum = 0.0;
  450. double global_Ar_Ar_inner_product_sum = 0.0;
  451. double local_r_norm = 0.0;
  452. double global_r_norm = 0.0;
  453.  
  454. vector <double> tmp_Ar_i_js(cur_block_size_x * cur_block_size_y, 0.0);
  455. vector <double> tmp_elements(cur_block_size_x * cur_block_size_y, 0.0);
  456.  
  457. #pragma omp parallel for
  458. for(int i = 0; i < cur_block_size_x; ++i){
  459. for(int j = 0; j < cur_block_size_y; ++j){
  460.  
  461. int current_node_global_offset_x = GetGlobalX(i),
  462. current_node_global_offset_y = GetGlobalY(j);
  463.  
  464. int glob_x = current_node_global_offset_x,
  465. glob_y = current_node_global_offset_y;
  466.  
  467. double tmp_Ar_i_j = ComputeMagicInnerProductA_iw(i, j);
  468.  
  469. double tmp_elem = Get(glob_x, glob_y);
  470.  
  471. tmp_elements [j + cur_block_size_y*i] = tmp_elem;
  472.  
  473. tmp_Ar_i_js [j + cur_block_size_y*i] = tmp_Ar_i_j;
  474. }
  475. }
  476.  
  477.  
  478.  
  479.  
  480. for (int i = 0; i < cur_block_size_x; ++i){
  481. for (int j = 0; j < cur_block_size_y; ++j){
  482.  
  483. double rho = 1.0;
  484.  
  485. int current_node_global_offset_x = GetGlobalX(i),
  486. current_node_global_offset_y = GetGlobalY(j);
  487.  
  488. int glob_x = current_node_global_offset_x,
  489. glob_y = current_node_global_offset_y;
  490.  
  491.  
  492. if ((i > 0) && (i < cur_block_size_x - 1) && (j > 0) && (j < cur_block_size_y - 1))
  493. rho = 1.0;
  494. else{
  495. switch (IsNodeInternalCornerOrSide(glob_x, glob_y)){
  496. case 2:
  497. case 4:
  498. case 6:
  499. case 8:
  500. //angle
  501. rho = 0.25;
  502. break;
  503. case 1:
  504. case 3:
  505. case 5:
  506. case 7:
  507. //side
  508. rho = 0.5;
  509. break;
  510. case 0:
  511. //internal
  512. rho = 1.0;
  513. break;
  514. default:
  515. cout << "[ERROR]: Bad global coords compute exact. Local:" << i << " " << j << ". Global:" << glob_x << " " << glob_y <<endl;
  516. }
  517. }
  518.  
  519. double tmp_cur_node_value = tmp_elements [j + cur_block_size_y*i];
  520. double tmp_Ar_i_j = tmp_Ar_i_js [j + cur_block_size_y*i];
  521. //local_diff_norm = max( fabs(tmp_elem), local_diff_norm);
  522. //local_diff_norm += rho * pow(tmp_elem, 2) * h_x * h_y;
  523.  
  524.  
  525. local_Ar_r_inner_product_sum += rho * tmp_Ar_i_j * tmp_cur_node_value * h_x*h_y;
  526. local_Ar_Ar_inner_product_sum += rho * pow (tmp_Ar_i_j, 2) * h_x*h_y;
  527. local_r_norm += rho * pow(tmp_cur_node_value, 2) * h_x*h_y;
  528.  
  529. }
  530. }
  531.  
  532.  
  533.  
  534.  
  535. //cout << "[DEBUG]: Local"<< local_Ar_r_inner_product_sum << endl;
  536.  
  537. MPI_Allreduce(&local_Ar_r_inner_product_sum, &global_Ar_r_inner_product_sum, 1, MPI_DOUBLE, MPI_SUM,
  538. comm);
  539.  
  540. //cout << "[DEBUG]: "<< global_Ar_r_inner_product_sum << endl;
  541.  
  542. MPI_Allreduce(&local_Ar_Ar_inner_product_sum, &global_Ar_Ar_inner_product_sum, 1, MPI_DOUBLE, MPI_SUM,
  543. MPI_COMM_WORLD);
  544.  
  545. //cout << "[DEBUG]: "<< global_Ar_Ar_inner_product_sum << endl;
  546.  
  547. double global_tau = global_Ar_r_inner_product_sum/ global_Ar_Ar_inner_product_sum;
  548.  
  549. MPI_Allreduce(&local_r_norm, &global_r_norm, 1, MPI_DOUBLE, MPI_SUM,
  550. MPI_COMM_WORLD);
  551.  
  552. double eps = 1e-06;
  553.  
  554. if (global_r_norm < 0){
  555. cout << "[ERROR]: bad global r norm" << endl;
  556. }
  557.  
  558. current_norm = fabs(global_tau)*sqrt(global_r_norm);
  559.  
  560. //if (my_rank == 0)
  561. // cout << "[DEBUG]: solution norm "<< tmp_norm << endl;
  562.  
  563. if (current_norm <= eps){
  564. if (my_rank == 0)
  565. cout << "[INFO]: r norm is " << sqrt(global_r_norm) << endl;
  566. should_i_stop = true;
  567. }else{
  568. should_i_stop = false;
  569. }
  570.  
  571. return global_tau;
  572.  
  573. }
  574.  
  575.  
  576. void ComputeNewW(double tau){
  577. #pragma omp parallel for
  578. for(int i = 0; i < cur_block_size_x; ++i){
  579. for(int j = 0; j < cur_block_size_y; ++j){
  580. internal_data[ j + cur_block_size_y*i ] = old_internal_data[ j + cur_block_size_y*i ] - tau * internal_data[ j + cur_block_size_y*i ];
  581. //old_internal_data[ j + cur_block_size_y*i ] = 0.0;
  582. }
  583. }
  584. }
  585.  
  586. int GetGlobalX(int i){
  587. return cur_block_global_offset_x + i;
  588. }
  589.  
  590. int GetGlobalY(int j){
  591. return cur_block_global_offset_y + j;
  592. }
  593.  
  594. public:
  595. MPIComputations(int inpM, int inpN, int inpGx, int inpGy, int inpx_left, int inpx_right, int inpy_bottom, int inpy_top, int inp_cur_block_global_coors_x, int inp_cur_block_global_coors_y, int inprank, MPI_Comm inpcomm){
  596.  
  597. M = inpM;
  598. N = inpN;
  599.  
  600. Gx = inpGx;
  601. Gy = inpGy;
  602.  
  603. x_left = inpx_left;
  604. x_right = inpx_right;
  605. y_bottom = inpy_bottom;
  606. y_top = inpy_top;
  607.  
  608. h_x = double((x_right - x_left)) / M;
  609. h_y = double((y_top - y_bottom)) / N;
  610.  
  611. my_rank = inprank;
  612. comm = inpcomm;
  613.  
  614.  
  615. ofstream myfile;
  616. stringstream ss;
  617. ss<< "./res" << N << Gx*Gy << "_"<< << inp_cur_block_global_coors_x << "_" << inp_cur_block_global_coors_y << ".res";
  618.  
  619. string filename;
  620.  
  621. ss >> filename;
  622.  
  623. my_file.open (filename);
  624.  
  625. cur_block_global_coors_x = inp_cur_block_global_coors_x;
  626. cur_block_global_coors_y = inp_cur_block_global_coors_y;
  627.  
  628. global_block_size_x = (M + 1) / Gx;
  629. global_block_size_y = (N + 1) / Gy;
  630.  
  631. cur_block_size_x = global_block_size_x;
  632. cur_block_size_y = global_block_size_y;
  633.  
  634. cur_block_global_offset_x = global_block_size_x * cur_block_global_coors_x;
  635. cur_block_global_offset_y = global_block_size_y * cur_block_global_coors_y;
  636.  
  637. //if ((cur_block_global_offset_x + global_block_size_x + Gx) > (M + 1)){
  638. if(cur_block_global_coors_x == Gx - 1 ){
  639. cur_block_size_x += (M + 1) % Gx;
  640. }
  641.  
  642. //if ((cur_block_global_offset_y + global_block_size_y + Gy) > (N + 1)){
  643. if(cur_block_global_coors_y == Gy - 1){
  644. cur_block_size_y += (N + 1) % Gy;
  645. }
  646.  
  647. internal_data.resize(cur_block_size_x * cur_block_size_y);
  648. old_internal_data.resize(cur_block_size_x * cur_block_size_y);
  649.  
  650.  
  651.  
  652. vector <vector< double> > tmp_coefs(cur_block_size_x * cur_block_size_y, vector<double>(5, 0.0));
  653.  
  654. coefs = tmp_coefs;
  655.  
  656. for(int i = 0; i < cur_block_size_x; i++){
  657. for (int j = 0; j < cur_block_size_y; j++){
  658. vector <double> &my_coefs = coefs[j + cur_block_size_y*i];
  659. ComputeCoefsForNode(my_coefs[0], my_coefs[1], my_coefs[2], my_coefs[3], my_coefs[4], i, j);
  660. }
  661. }
  662.  
  663. //OX
  664. external_data[0].resize(cur_block_size_y);
  665. external_data[1].resize(cur_block_size_y);
  666.  
  667. //OY
  668. external_data[2].resize(cur_block_size_x);
  669. external_data[3].resize(cur_block_size_x);
  670.  
  671. }
  672.  
  673. double Get(int i, int j) {
  674. return GetLocalIndex(i - cur_block_global_offset_x, j - cur_block_global_offset_y);
  675. }
  676.  
  677. void Set(int i, int j, double v) {
  678. return SetLocalIndex(i - cur_block_global_offset_x, j - cur_block_global_offset_y, v);
  679. }
  680.  
  681. void SyncMPI(){
  682.  
  683. /*if(my_rank == 0)
  684. cout << "[INFO]: Sync started"<< endl;
  685. */
  686.  
  687. //MPI_Barrier(MPI_COMM_WORLD);
  688.  
  689. //left and right sides
  690. #pragma omp parallel for
  691. for(int j = 0; j < cur_block_size_y; ++j){
  692.  
  693. external_data[ 0 ][ j ] = GetLocalIndex(0, j);//internal_data[ j ];
  694. external_data[ 1 ][ j ] = GetLocalIndex(cur_block_size_x - 1, j);//internal_data[ j + cur_block_size_y * (cur_block_size_x - 1) ];
  695.  
  696. }
  697.  
  698. //bottom and top sides
  699. #pragma omp parallel for
  700. for(int i = 0; i < cur_block_size_x; ++i){
  701.  
  702. external_data[ 2 ][ i ] = GetLocalIndex(i, 0);//internal_data[ cur_block_size_y*i ];
  703. external_data[ 3 ][ i ] = GetLocalIndex(i, cur_block_size_y - 1); //internal_data[ (cur_block_size_y - 1) + cur_block_size_y*i ];
  704.  
  705. }
  706.  
  707. int my_coords[2];
  708. int targets_ranks[4];
  709.  
  710. MPI_Cart_coords(comm, my_rank, 2, my_coords);
  711.  
  712. int neighbour_offsets[ 4 ][ 2 ] = {
  713. { -1, 0 },{ 1, 0 },
  714. { 0, -1 },{ 0, 1 }
  715. };
  716.  
  717. #pragma omp parallel for
  718. for(int i = 0; i < 4; i++){
  719.  
  720. int target_coords[2];
  721.  
  722. target_coords[ 0 ] = my_coords[ 0 ] + neighbour_offsets[ i ][ 0 ];
  723. target_coords[ 1 ] = my_coords[ 1 ] + neighbour_offsets[ i ][ 1 ];
  724.  
  725. if (target_coords[0] >= 0 && target_coords[0] < Gx && target_coords[1] >= 0 && target_coords[1] < Gy){
  726.  
  727. MPI_Cart_rank(comm, target_coords, &targets_ranks[ i ]);
  728.  
  729. }
  730. else{
  731. targets_ranks[i] = -1;
  732. }
  733. }
  734.  
  735. //Now we have rank for all targets
  736.  
  737. for(int axis = 0; axis < 2; axis++){
  738.  
  739. int parity_bit = (my_coords[ axis ]) % 2;
  740.  
  741. //if parity_bit == 0 then
  742. // zuerst mit links, dann mit rechts tauschen
  743. //elif parity_bit == 1:
  744. // zuerst mit rechts, dann mit links tauschen
  745.  
  746. for(int tmp = 0; tmp < 2; tmp++){
  747. parity_bit = 1 - parity_bit;
  748.  
  749. //target id in external_data und targets_ranks
  750. int target_idx = 2 * axis + (1 - parity_bit);
  751.  
  752. if (targets_ranks[target_idx] != -1){
  753.  
  754. int send_tag = 100000 + my_rank * 100 + axis * 10 + parity_bit;
  755. int recv_tag = 100000 + targets_ranks[ target_idx ] * 100 + axis * 10 + (1-parity_bit);
  756.  
  757. MPI_Status tmp_status;
  758.  
  759. if(my_rank != targets_ranks[ target_idx ]){
  760.  
  761. MPI_Sendrecv_replace(&external_data[ target_idx ][ 0 ], external_data[ target_idx ].size(),
  762. MPI_DOUBLE, targets_ranks[ target_idx ], send_tag, targets_ranks[ target_idx ], recv_tag,
  763. comm, &tmp_status);
  764.  
  765. }
  766. }
  767. }
  768. }
  769.  
  770. //MPI_Barrier(MPI_COMM_WORLD);
  771.  
  772. }
  773.  
  774. void DoIteration(bool &should_i_stop){
  775.  
  776. ComputeMatrixR();
  777.  
  778. //Now R Matrix is in internal_data
  779.  
  780. double tau = ComputeTauAndStopCase(should_i_stop);
  781.  
  782. //We have in our block jetzt:
  783. //in internal_data: R Matrix
  784. //in old_internal_data: w aus letzte iteration
  785. //and we have tau
  786. //jetzt koennen wir naechste w finden
  787.  
  788. ComputeNewW(tau);
  789.  
  790. SyncMPI();
  791.  
  792. }
  793.  
  794. double GetLocalIndex(int i, int j){
  795. //internal data
  796. if ((j >= 0) && (j < cur_block_size_y) && (i >= 0) && (i < cur_block_size_x)){
  797. return internal_data[ j + cur_block_size_y*i ];
  798. }
  799.  
  800. //external data
  801. //OX
  802. if((j >= 0) && (j < cur_block_size_y)){
  803.  
  804. if (i == -1)
  805. return external_data[ 0 ][ j ];
  806.  
  807. if (i == cur_block_size_x)
  808. return external_data[ 1 ][ j ];
  809.  
  810. }
  811.  
  812. //OY
  813. if((i >= 0) && (i < cur_block_size_x)){
  814.  
  815. if (j == -1)
  816. return external_data[ 2 ][ i ];
  817. if (j == cur_block_size_y)
  818. return external_data[ 3 ][ i ];
  819.  
  820. }
  821.  
  822. cout << "[ERROR]: bad local index" << endl;
  823.  
  824. return nan("");
  825. }
  826.  
  827. void SetLocalIndex(int i, int j, double v){
  828. if ((j >= 0) && (j < cur_block_size_y) && (i >= 0) && (i < cur_block_size_x)){
  829. internal_data[ j + cur_block_size_y*i ] = v;
  830. }else{
  831. cout << "[ERROR]: trying to set data outside the local area" << endl;
  832. }
  833. }
  834.  
  835. double CompareWithExact() {
  836.  
  837. double local_diff_norm = 0.0;
  838. double global_diff_norm = 0.0;
  839. /*
  840. if (my_rank == 0)
  841. cout << "[INFO]: Starting computing compare with exact" << endl;
  842. */
  843. vector <double> tmp_elements(cur_block_size_x * cur_block_size_y, 0.0);
  844.  
  845. #pragma omp parallel for
  846. for (int i = 0; i < cur_block_size_x; ++i){
  847. for (int j = 0; j < cur_block_size_y; ++j){
  848.  
  849.  
  850. int current_node_global_offset_x = GetGlobalX(i),
  851. current_node_global_offset_y = GetGlobalY(j);
  852.  
  853. int glob_x = current_node_global_offset_x,
  854. glob_y = current_node_global_offset_y;
  855.  
  856. double tmp_elem = Get(glob_x, glob_y) - GetNodeFromExact(glob_x, glob_y);
  857.  
  858. //local_diff_norm = max( fabs(tmp_elem), local_diff_norm);
  859.  
  860. //local_diff_norm += rho * pow(tmp_elem, 2) * h_x * h_y;
  861.  
  862. tmp_elements [j + cur_block_size_y*i] = tmp_elem;
  863.  
  864. }
  865. }
  866.  
  867. //cout << "[INFO]: local max diff in " << cur_block_global_offset_x << " " << cur_block_global_offset_y << " " << local_diff_norm << endl;
  868.  
  869.  
  870. for (int i = 0; i < cur_block_size_x; ++i){
  871. for (int j = 0; j < cur_block_size_y; ++j){
  872.  
  873. double rho = 1.0;
  874.  
  875. int current_node_global_offset_x = GetGlobalX(i),
  876. current_node_global_offset_y = GetGlobalY(j);
  877.  
  878. int glob_x = current_node_global_offset_x,
  879. glob_y = current_node_global_offset_y;
  880.  
  881.  
  882. if ((i > 0) && (i < cur_block_size_x - 1) && (j > 0) && (j < cur_block_size_y - 1))
  883. rho = 1.0;
  884. else{
  885. switch (IsNodeInternalCornerOrSide(glob_x, glob_y)){
  886. case 2:
  887. case 4:
  888. case 6:
  889. case 8:
  890. //angle
  891. rho = 0.25;
  892. break;
  893. case 1:
  894. case 3:
  895. case 5:
  896. case 7:
  897. //side
  898. rho = 0.5;
  899. break;
  900. case 0:
  901. //internal
  902. rho = 1.0;
  903. break;
  904. default:
  905. cout << "[ERROR]: Bad global coords compute exact. Local:" << i << " " << j << ". Global:" << glob_x << " " << glob_y <<endl;
  906. }
  907. }
  908.  
  909. double tmp_elem = tmp_elements [j + cur_block_size_y*i];
  910.  
  911. local_diff_norm = max( fabs(tmp_elem), local_diff_norm);
  912. //local_diff_norm += rho * pow(tmp_elem, 2) * h_x * h_y;
  913. }
  914. }
  915.  
  916. MPI_Allreduce(&local_diff_norm, &global_diff_norm, 1, MPI_DOUBLE, MPI_MAX,
  917. MPI_COMM_WORLD);
  918.  
  919. //global_diff_norm = sqrt(global_diff_norm);
  920.  
  921.  
  922.  
  923. for (int i = 0; i < cur_block_size_x; ++i){
  924. for (int j = 0; j < cur_block_size_y; ++j){
  925.  
  926. int current_node_global_offset_x = GetGlobalX(i),
  927. current_node_global_offset_y = GetGlobalY(j);
  928.  
  929. int glob_x = current_node_global_offset_x,
  930. glob_y = current_node_global_offset_y;
  931.  
  932. my_file << glob_x << "," << glob_y << "," << Get(glob_x, glob_y) << "," << GetNodeFromExact(glob_x, glob_y) << endl;
  933. }
  934. }
  935.  
  936.  
  937. return global_diff_norm;
  938. }
  939.  
  940. double GetCurrentNorm(){
  941. return current_norm;
  942. }
  943. };
  944.  
  945.  
  946.  
  947.  
  948. int main(int argc, char* argv[]){
  949.  
  950.  
  951. const double x_left = -2, x_right = 3;
  952. const double y_bottom = -1, y_top = 4;
  953.  
  954. double time_start, time_stop;
  955. int N, Gx, Gy;
  956. int dim[2], period[2];
  957.  
  958. MPI_Comm comm;
  959.  
  960. //N - global grid size
  961. N = atoi(argv[1]);
  962.  
  963. //Gx
  964. Gx = dim[0] = atoi(argv[2]);
  965.  
  966. //Gy
  967. Gy = dim[1] = atoi(argv[3]);
  968.  
  969.  
  970. stringstream ss;
  971. ss<< "./logs/out" << N << Gx*Gy << ".res";
  972.  
  973. string filename;
  974.  
  975. ss >> filename;
  976.  
  977. ofstream out(filename.c_str());
  978. streambuf *coutbuf = cout.rdbuf(); //save old buf
  979. cout.rdbuf(out.rdbuf());
  980.  
  981.  
  982. stringstream sserr;
  983. sserr<< "./logs/out" << N << Gx*Gy << ".err";
  984.  
  985. string filenameerr;
  986.  
  987. sserr >> filenameerr;
  988.  
  989. ofstream err(filenameerr.c_str());
  990. streambuf *cerrbuf = cerr.rdbuf(); //save old buf
  991. cerr.rdbuf(err.rdbuf());
  992.  
  993. period[0]=0;
  994. period[1]=0;
  995.  
  996.  
  997. MPI_Init(&argc, &argv);
  998.  
  999. time_start = MPI_Wtime();
  1000.  
  1001. int world_size;
  1002.  
  1003. int my_rank;
  1004.  
  1005. MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
  1006. MPI_Comm_size(MPI_COMM_WORLD, &world_size);
  1007.  
  1008.  
  1009. if(my_rank == 0){
  1010. cout << "[INFO]: N = " << N << endl;
  1011. cout << "[INFO]: Gx = " << dim[0] << endl;
  1012. cout << "[INFO]: Gy = " << dim[1] << endl;
  1013. }
  1014.  
  1015.  
  1016.  
  1017. if(argc <= 3){
  1018. if(my_rank == 0)
  1019. cout << "[ERROR]: Usage: mpieval <N> <Gx> <Gy>" << endl;
  1020.  
  1021. MPI_Abort(MPI_COMM_WORLD, 1);
  1022. }
  1023.  
  1024. if(Gx * Gy != world_size){
  1025. if(my_rank == 0)
  1026. cout << "[ERROR]: mpi world size is not equal to "<< Gx << "*" << Gy << endl;
  1027.  
  1028. MPI_Abort(MPI_COMM_WORLD, 1);
  1029. }
  1030.  
  1031. MPI_Cart_create(MPI_COMM_WORLD, 2, dim, period, 1, &comm);
  1032.  
  1033. /*if(my_rank == 0)
  1034. cout << "[INFO]: Cart created"<<endl;
  1035. */
  1036.  
  1037. MPI_Comm_rank(comm, &my_rank);
  1038.  
  1039. int my_coords[2];
  1040.  
  1041. MPI_Cart_coords(comm, my_rank, 2, my_coords);
  1042.  
  1043.  
  1044.  
  1045.  
  1046. class MPIComputations w_func(N, N, Gx, Gy, x_left, x_right, y_bottom, y_top, my_coords[0], my_coords[1], my_rank, comm);
  1047.  
  1048.  
  1049. int iteration_num = 0;
  1050.  
  1051. bool should_i_stop = false;
  1052.  
  1053. while ( should_i_stop != true ){
  1054.  
  1055. w_func.DoIteration(should_i_stop);
  1056.  
  1057. if ( (my_rank == 0) && (iteration_num % 10000 == 0)){
  1058. cout << "[INFO]: Iteration " << iteration_num << " difference with previous is: "<< w_func.GetCurrentNorm() << endl;
  1059. }
  1060.  
  1061. iteration_num++;
  1062. }
  1063.  
  1064. //MPI_Barrier(MPI_COMM_WORLD);
  1065.  
  1066. //vector <double> local_elements;
  1067.  
  1068. double comparing = w_func.CompareWithExact();
  1069.  
  1070. if (my_rank == 0)
  1071. cout << "[INFO]: Diff with exact solution: " << comparing << endl;
  1072.  
  1073.  
  1074.  
  1075. //myfile.close();
  1076.  
  1077. time_stop = MPI_Wtime();
  1078. if( my_rank == 0 )
  1079. cout << "Finished!" << endl
  1080. << "Total iterations: " << iteration_num << endl
  1081. << "Elapsed time: " << (time_stop - time_start) << endl
  1082. << "Time per iteration: " << (time_stop - time_start) / double(iteration_num) << endl;
  1083.  
  1084. MPI_Finalize();
  1085.  
  1086. cout.rdbuf(coutbuf); //reset to standard input again
  1087. cerr.rdbuf(cerrbuf); //reset to standard output again
  1088.  
  1089.  
  1090. return 0;
  1091. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement