Advertisement
manac68974

Untitled

Dec 19th, 2019
99
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 33.04 KB | None | 0 0
  1. #include <mpi.h>
  2. #include <omp.h>
  3.  
  4. #include <iomanip>
  5. #include <iostream>
  6. #include <vector>
  7. #include <map>
  8. #include <cmath>
  9. #include <cstdlib>
  10. #include <algorithm>
  11. #include <fstream>
  12. #include<sstream>
  13.  
  14. using namespace std;
  15.  
  16. class MPIComputations{
  17. int M, N;
  18. int cur_block_size_x, cur_block_size_y, cur_block_global_offset_x, cur_block_global_offset_y;
  19. int Gx, Gy;
  20. int cur_block_global_coors_x, cur_block_global_coors_y;
  21. double h_x, h_y;
  22. int global_block_size_x, global_block_size_y;
  23. int my_rank;
  24. double current_norm;
  25. MPI_Comm comm;
  26. double x_left, x_right, y_bottom, y_top;
  27.  
  28. vector<double> internal_data;
  29. vector<double> old_internal_data;
  30. vector<double> external_data[4];
  31.  
  32. vector <vector <double> > coefs;
  33.  
  34. int IsNodeInternalCornerOrSide(int current_node_global_offset_x, int current_node_global_offset_y){
  35.  
  36. if ((cur_block_global_coors_x > 0 ) && (cur_block_global_coors_x < Gx - 1) && (cur_block_global_coors_y > 0) && (cur_block_global_coors_y < Gy - 1))
  37. return 0;
  38.  
  39. //corners
  40. //left bottom corner
  41. if (current_node_global_offset_x == 0 && current_node_global_offset_y == 0){
  42. return 2;
  43. }
  44.  
  45. //left top corner
  46. if (current_node_global_offset_x == 0 && current_node_global_offset_y == N){
  47. return 4;
  48. }
  49.  
  50. //right bottom corner
  51. if (current_node_global_offset_x == M && current_node_global_offset_y == 0){
  52. return 6;
  53. }
  54.  
  55. //right top corner
  56. if (current_node_global_offset_x == M && current_node_global_offset_y == N){
  57. return 8;
  58. }
  59.  
  60. //sides
  61. //left side
  62. if (current_node_global_offset_y >= 1 && current_node_global_offset_y <= N - 1 &&
  63. current_node_global_offset_x == 0){
  64. return 1;
  65. }
  66.  
  67. //right side
  68. if (current_node_global_offset_y >= 1 && current_node_global_offset_y <= N - 1 &&
  69. current_node_global_offset_x == M){
  70. return 3;
  71. }
  72.  
  73. //bottom side
  74. if (current_node_global_offset_x >= 1 && current_node_global_offset_x <= M - 1 &&
  75. current_node_global_offset_y == 0){
  76. return 5;
  77. }
  78.  
  79. //top side
  80. if ((current_node_global_offset_x >= 1 && current_node_global_offset_x <= M - 1 &&
  81. current_node_global_offset_y == N)){
  82. return 7;
  83. }
  84.  
  85. //internal
  86. if ((current_node_global_offset_x >= 1 && current_node_global_offset_x <= M - 1) &&
  87. (current_node_global_offset_y >= 1 && current_node_global_offset_y <= N - 1)){
  88. return 0;
  89. }
  90.  
  91. return -1;
  92. }
  93.  
  94. double k(double x, double y) {
  95. return 1 + pow(x + y, 2);
  96. }
  97.  
  98. double q(double x, double y) {
  99. return 1;
  100. }
  101.  
  102. double u(double x, double y) {
  103. return 2.0 / (1 + pow(x, 2) + pow(y, 2));
  104. }
  105.  
  106. // psi_R(x, y) = k(A2, y) * du/dx(A2, y)
  107. double psi_R(double y) {
  108. return (-12) * (pow((y + 3), 2) + 1) / pow((pow(y, 2) + 10), 2);
  109. }
  110.  
  111. // psi_L(x, y) = -k(A1, y) * du/dx(A1, y)
  112. double psi_L(double y) {
  113. return (-8) * (pow((y - 2), 2) + 1) / pow((pow(y, 2) + 5), 2);
  114. }
  115.  
  116. // psi_T(x, y) = k(x, B2) * du/dy(x, B2)
  117. double psi_T(double x) {
  118. return (-16) * (pow((x + 4), 2) + 1) / pow((pow(x, 2) + 17), 2);
  119. }
  120.  
  121. // psi_B(x, y) = -k(x, B1) * du/dy(x, B1)
  122. double psi_B(double x) {
  123. return (-4) * (pow((x - 1), 2) + 1) / pow((pow(x, 2) + 2), 2);
  124. }
  125.  
  126. // right-part function of Poisson equation
  127. double F(double x, double y) {
  128. return 2 * (pow(x,4) + pow(y,4) + 2 * (pow(x,2) + 3) * pow(y,2) + 6 * pow(x,2) + 16*x*y + 5)
  129. / pow((1 + pow(x, 2) + pow(y, 2)), 3);
  130. }
  131.  
  132. //inner_product(A[i], internal_data)
  133.  
  134. //inner_product(A[i], internal_data)
  135. void ComputeCoefsForNode (double &left_coeff, double &right_coeff, double &bottom_coeff, double &top_coeff, double &this_coeff, int i, int j){
  136.  
  137. int glob_x = GetGlobalX(i);
  138. int glob_y = GetGlobalY(j);
  139.  
  140.  
  141. map <string,bool> neighbours;
  142. neighbours["left"] = true;
  143. neighbours["right"] = true;
  144. neighbours["bottom"] = true;
  145. neighbours["top"] = true;
  146.  
  147. left_coeff = 1.0;
  148. right_coeff = 1.0;
  149. bottom_coeff = 1.0;
  150. top_coeff = 1.0;
  151. this_coeff = 1.0;
  152. if (i > 0 && i < cur_block_size_x - 1 && j > 0 && j < cur_block_size_y - 1)
  153. ;
  154. else{
  155. switch (IsNodeInternalCornerOrSide(glob_x, glob_y)){
  156. case 2:
  157. //left bottom corner
  158. neighbours["left"] = false;
  159. neighbours["bottom"] = false;
  160. break;
  161. case 4:
  162. //left top corner
  163. neighbours["left"] = false;
  164. neighbours["top"] = false;
  165. break;
  166. case 6:
  167. //right bottom corner
  168. neighbours["right"] = false;
  169. neighbours["bottom"] = false;
  170. break;
  171. case 8:
  172. //right top corner
  173. neighbours["right"] = false;
  174. neighbours["top"] = false;
  175. break;
  176. case 1:
  177. //left side
  178. neighbours["left"] = false;
  179. break;
  180. case 3:
  181. //right side
  182. neighbours["right"] = false;
  183. break;
  184. case 5:
  185. //bottom side
  186. neighbours["bottom"] = false;
  187. break;
  188. case 7:
  189. //top side
  190. neighbours["top"] = false;
  191. break;
  192. case 0:
  193. //internal
  194. break;
  195. default:
  196. cout << "[ERROR]: Bad global coords compute coef. Global:" << glob_x << " " << glob_y<<endl;
  197. }
  198.  
  199.  
  200. if (!neighbours["left"]){
  201. right_coeff = 2.0;
  202. left_coeff = 0.0;
  203. }
  204.  
  205. if (!neighbours["right"]){
  206. left_coeff = 2.0;
  207. right_coeff = 0.0;
  208. }
  209.  
  210. if (!neighbours["bottom"]){
  211. top_coeff = 2.0;
  212. bottom_coeff = 0.0;
  213. }
  214.  
  215. if (!neighbours["top"]){
  216. bottom_coeff = 2.0;
  217. top_coeff = 0.0;
  218. }
  219. }
  220.  
  221. if (neighbours["left"]){
  222. left_coeff *= -k(x_left + (glob_x - 0.5) * h_x, y_bottom + glob_y * h_y) / pow(h_x, 2);
  223. }
  224.  
  225. if (neighbours["right"]){
  226. right_coeff *= -k(x_left + (glob_x + 0.5) * h_x, y_bottom + glob_y * h_y) / pow(h_x, 2);
  227. }
  228.  
  229. if (neighbours["bottom"]){
  230. bottom_coeff *= -k(x_left + glob_x * h_x, y_bottom + (glob_y - 0.5) * h_y) / pow(h_y, 2);
  231. }
  232.  
  233. if (neighbours["top"]){
  234. top_coeff *= -k(x_left + glob_x * h_x, y_bottom + (glob_y + 0.5) * h_y) / pow(h_y, 2);
  235. }
  236.  
  237. this_coeff = q(x_left + glob_x * h_x, y_bottom + glob_y * h_y) - left_coeff - right_coeff - bottom_coeff - top_coeff;
  238.  
  239. /*coefs[0] = left_coeff;
  240. coefs[1] = right_coeff;
  241. coefs[2] = bottom_coeff;
  242. coefs[3] = top_coeff;
  243. coefs[4] = this_coeff;*/
  244. }
  245.  
  246. double ComputeMagicInnerProductA_iw (int i, int j){
  247.  
  248. int glob_x = GetGlobalX(i);
  249. int glob_y = GetGlobalY(j);
  250.  
  251. double result = 0.0;
  252.  
  253. map <string,bool> neighbours;
  254. neighbours["left"] = true;
  255. neighbours["right"] = true;
  256. neighbours["bottom"] = true;
  257. neighbours["top"] = true;
  258.  
  259.  
  260. vector <double> my_coefs = coefs[j + cur_block_size_y*i];
  261.  
  262. double left_neighbour = 0.0, right_neighbour = 0.0, bottom_neighbour = 0.0, top_neighbour = 0.0, this_node = 0.0;
  263. double left_coeff = my_coefs[0], right_coeff = my_coefs[1], bottom_coeff = my_coefs[2], top_coeff = my_coefs[3], this_coeff = my_coefs[4];
  264.  
  265.  
  266. if (i > 0 && i < cur_block_size_x - 1 && j > 0 && j < cur_block_size_y - 1)
  267. ;
  268. else{
  269. switch (IsNodeInternalCornerOrSide(glob_x, glob_y)){
  270. case 2:
  271. //left bottom corner
  272. neighbours["left"] = false;
  273. neighbours["bottom"] = false;
  274. break;
  275. case 4:
  276. //left top corner
  277. neighbours["left"] = false;
  278. neighbours["top"] = false;
  279. break;
  280. case 6:
  281. //right bottom corner
  282. neighbours["right"] = false;
  283. neighbours["bottom"] = false;
  284. break;
  285. case 8:
  286. //right top corner
  287. neighbours["right"] = false;
  288. neighbours["top"] = false;
  289. break;
  290. case 1:
  291. //left side
  292. neighbours["left"] = false;
  293. break;
  294. case 3:
  295. //right side
  296. neighbours["right"] = false;
  297. break;
  298. case 5:
  299. //bottom side
  300. neighbours["bottom"] = false;
  301. break;
  302. case 7:
  303. //top side
  304. neighbours["top"] = false;
  305. break;
  306. case 0:
  307. //internal
  308. break;
  309. default:
  310. cout << "[ERROR]: Bad global coords compute matrix. Global:" << glob_x << " " << glob_y<<endl;
  311. }
  312. }
  313.  
  314.  
  315.  
  316. if (neighbours["left"]){
  317. left_neighbour = Get(glob_x - 1, glob_y);
  318. }
  319.  
  320. if (neighbours["right"]){
  321. right_neighbour = Get(glob_x + 1, glob_y);
  322. }
  323.  
  324. if (neighbours["bottom"]){
  325. bottom_neighbour = Get(glob_x, glob_y - 1);
  326. }
  327.  
  328. if (neighbours["top"]){
  329. top_neighbour = Get(glob_x, glob_y + 1);
  330. }
  331.  
  332. this_node = Get(glob_x, glob_y);
  333.  
  334. result = left_coeff * left_neighbour +
  335. right_coeff * right_neighbour +
  336. bottom_coeff * bottom_neighbour +
  337. top_coeff * top_neighbour +
  338. this_coeff * this_node;
  339.  
  340. return result;
  341. }
  342.  
  343. double GetNodeFromB(int current_node_global_offset_x, int current_node_global_offset_y) {
  344.  
  345. int glob_x = current_node_global_offset_x;
  346. int glob_y = current_node_global_offset_y;
  347.  
  348. double result = 0.0;
  349.  
  350. switch (IsNodeInternalCornerOrSide(glob_x, glob_y)){
  351. case 2:
  352. //left bottom corner
  353. result = F(x_left, y_bottom) + 2.0 / h_x * psi_L(y_bottom) + 2.0 / h_y * psi_B(x_left);
  354. break;
  355. case 4:
  356. //left top corner
  357. result = F(x_left, y_top) + 2.0 / h_x * psi_L(y_top) + 2.0 / h_y * psi_T(x_left);
  358. break;
  359. case 6:
  360. //right bottom corner
  361. result = F(x_right, y_bottom) + 2.0 / h_x * psi_R(y_bottom) + 2.0 / h_y * psi_B(x_right);
  362. break;
  363. case 8:
  364. //right top corner
  365. result = F(x_right, y_top) + 2.0 / h_x * psi_R(y_top) + 2.0 / h_y * psi_T(x_right);
  366. break;
  367. case 1:
  368. //left side
  369. result = F(x_left, y_bottom + glob_y * h_y) + 2.0 / h_x * psi_L(y_bottom + glob_y * h_y);
  370. break;
  371. case 3:
  372. //right side
  373. result = F(x_right, y_bottom + glob_y * h_y) + 2.0 / h_x * psi_R(y_bottom + glob_y * h_y);
  374. break;
  375. case 5:
  376. //bottom side
  377. result = F(x_left + glob_x * h_x, y_bottom) + 2.0 / h_y * psi_B(x_left + glob_x * h_x);
  378. break;
  379. case 7:
  380. //top side
  381. result = F(x_left + glob_x * h_x, y_top) + 2.0 / h_y * psi_T(x_left + glob_x * h_x);
  382. break;
  383. case 0:
  384. //internal
  385. result = F(x_left + glob_x * h_x, y_bottom + glob_y * h_y);
  386. break;
  387. default:
  388. cout << "[ERROR]: Bad global coords compute matrix. Global:" << glob_x << " " << glob_y <<endl;
  389.  
  390. }
  391.  
  392. return result;
  393.  
  394. }
  395.  
  396. double GetNodeFromExact(int current_node_global_offset_x, int current_node_global_offset_y) {
  397.  
  398. int glob_x = current_node_global_offset_x;
  399. int glob_y = current_node_global_offset_y;
  400.  
  401. return u(x_left + glob_x * h_x, y_bottom + glob_y * h_y);
  402.  
  403. }
  404.  
  405. void ComputeMatrixR(){
  406. /*if(my_rank == 0)
  407. cout << "[INFO]: Computation of matrix r started"<<endl;
  408. */
  409.  
  410. vector<double> r_tmp_matrix (cur_block_size_x*cur_block_size_y, 0.0);
  411.  
  412. //#pragma omp parallel for
  413. for(int i = 0; i < cur_block_size_x; ++i){
  414. for(int j = 0; j < cur_block_size_y; ++j){
  415.  
  416. int current_node_global_offset_x = GetGlobalX(i),
  417. current_node_global_offset_y = GetGlobalY(j);
  418.  
  419. int glob_x = current_node_global_offset_x,
  420. glob_y = current_node_global_offset_y;
  421.  
  422. r_tmp_matrix [ j + cur_block_size_y*i ] = ComputeMagicInnerProductA_iw(i,j) - GetNodeFromB(glob_x, glob_y);
  423.  
  424. }
  425. }
  426.  
  427. //#pragma omp parallel for
  428. for(int i = 0; i < cur_block_size_x; ++i){
  429. for(int j = 0; j < cur_block_size_y; ++j){
  430.  
  431. old_internal_data[ j + cur_block_size_y*i ] = internal_data[ j + cur_block_size_y*i];
  432. internal_data[ j + cur_block_size_y*i ] = r_tmp_matrix[ j + cur_block_size_y*i];
  433.  
  434. }
  435. }
  436.  
  437. //old_internal_data = internal_data;
  438. //internal_data = r_tmp_matrix;
  439.  
  440. SyncMPI();
  441.  
  442. }
  443.  
  444. double ComputeTauAndStopCase(bool &should_i_stop){
  445.  
  446. double local_Ar_r_inner_product_sum = 0.0;
  447. double local_Ar_Ar_inner_product_sum = 0.0;
  448. double global_Ar_r_inner_product_sum = 0.0;
  449. double global_Ar_Ar_inner_product_sum = 0.0;
  450. double local_r_norm = 0.0;
  451. double global_r_norm = 0.0;
  452.  
  453. vector <double> tmp_Ar_i_js(cur_block_size_x * cur_block_size_y, 0.0);
  454. vector <double> tmp_elements(cur_block_size_x * cur_block_size_y, 0.0);
  455.  
  456. //#pragma omp parallel for
  457. for(int i = 0; i < cur_block_size_x; ++i){
  458. for(int j = 0; j < cur_block_size_y; ++j){
  459.  
  460. int current_node_global_offset_x = GetGlobalX(i),
  461. current_node_global_offset_y = GetGlobalY(j);
  462.  
  463. int glob_x = current_node_global_offset_x,
  464. glob_y = current_node_global_offset_y;
  465.  
  466. double tmp_Ar_i_j = ComputeMagicInnerProductA_iw(i, j);
  467.  
  468. double tmp_elem = Get(glob_x, glob_y);
  469.  
  470. tmp_elements [j + cur_block_size_y*i] = tmp_elem;
  471.  
  472. tmp_Ar_i_js [j + cur_block_size_y*i] = tmp_Ar_i_j;
  473. }
  474. }
  475.  
  476.  
  477.  
  478.  
  479. for (int i = 0; i < cur_block_size_x; ++i){
  480. for (int j = 0; j < cur_block_size_y; ++j){
  481.  
  482. double rho = 1.0;
  483.  
  484. int current_node_global_offset_x = GetGlobalX(i),
  485. current_node_global_offset_y = GetGlobalY(j);
  486.  
  487. int glob_x = current_node_global_offset_x,
  488. glob_y = current_node_global_offset_y;
  489.  
  490.  
  491. if ((i > 0) && (i < cur_block_size_x - 1) && (j > 0) && (j < cur_block_size_y - 1))
  492. rho = 1.0;
  493. else{
  494. switch (IsNodeInternalCornerOrSide(glob_x, glob_y)){
  495. case 2:
  496. case 4:
  497. case 6:
  498. case 8:
  499. //angle
  500. rho = 0.25;
  501. break;
  502. case 1:
  503. case 3:
  504. case 5:
  505. case 7:
  506. //side
  507. rho = 0.5;
  508. break;
  509. case 0:
  510. //internal
  511. rho = 1.0;
  512. break;
  513. default:
  514. cout << "[ERROR]: Bad global coords compute exact. Local:" << i << " " << j << ". Global:" << glob_x << " " << glob_y <<endl;
  515. }
  516. }
  517.  
  518. double tmp_cur_node_value = tmp_elements [j + cur_block_size_y*i];
  519. double tmp_Ar_i_j = tmp_Ar_i_js [j + cur_block_size_y*i];
  520. //local_diff_norm = max( fabs(tmp_elem), local_diff_norm);
  521. //local_diff_norm += rho * pow(tmp_elem, 2) * h_x * h_y;
  522.  
  523.  
  524. local_Ar_r_inner_product_sum += rho * tmp_Ar_i_j * tmp_cur_node_value * h_x*h_y;
  525. local_Ar_Ar_inner_product_sum += rho * pow (tmp_Ar_i_j, 2) * h_x*h_y;
  526. local_r_norm += rho * pow(tmp_cur_node_value, 2) * h_x*h_y;
  527.  
  528. }
  529. }
  530.  
  531.  
  532.  
  533.  
  534. //cout << "[DEBUG]: Local"<< local_Ar_r_inner_product_sum << endl;
  535.  
  536. MPI_Allreduce(&local_Ar_r_inner_product_sum, &global_Ar_r_inner_product_sum, 1, MPI_DOUBLE, MPI_SUM,
  537. comm);
  538.  
  539. //cout << "[DEBUG]: "<< global_Ar_r_inner_product_sum << endl;
  540.  
  541. MPI_Allreduce(&local_Ar_Ar_inner_product_sum, &global_Ar_Ar_inner_product_sum, 1, MPI_DOUBLE, MPI_SUM,
  542. MPI_COMM_WORLD);
  543.  
  544. //cout << "[DEBUG]: "<< global_Ar_Ar_inner_product_sum << endl;
  545.  
  546. double global_tau = global_Ar_r_inner_product_sum/ global_Ar_Ar_inner_product_sum;
  547.  
  548. MPI_Allreduce(&local_r_norm, &global_r_norm, 1, MPI_DOUBLE, MPI_SUM,
  549. MPI_COMM_WORLD);
  550.  
  551. double eps = 1e-06;
  552.  
  553. if (global_r_norm < 0){
  554. cout << "[ERROR]: bad global r norm" << endl;
  555. }
  556.  
  557. current_norm = fabs(global_tau)*sqrt(global_r_norm);
  558.  
  559. //if (my_rank == 0)
  560. // cout << "[DEBUG]: solution norm "<< tmp_norm << endl;
  561.  
  562. if (current_norm <= eps){
  563. if (my_rank == 0)
  564. cout << "[INFO]: r norm is " << sqrt(global_r_norm) << endl;
  565. should_i_stop = true;
  566. }else{
  567. should_i_stop = false;
  568. }
  569.  
  570. return global_tau;
  571.  
  572. }
  573.  
  574.  
  575. void ComputeNewW(double tau){
  576. //#pragma omp parallel for
  577. for(int i = 0; i < cur_block_size_x; ++i){
  578. for(int j = 0; j < cur_block_size_y; ++j){
  579. internal_data[ j + cur_block_size_y*i ] = old_internal_data[ j + cur_block_size_y*i ] - tau * internal_data[ j + cur_block_size_y*i ];
  580. //old_internal_data[ j + cur_block_size_y*i ] = 0.0;
  581. }
  582. }
  583. }
  584.  
  585. int GetGlobalX(int i){
  586. return cur_block_global_offset_x + i;
  587. }
  588.  
  589. int GetGlobalY(int j){
  590. return cur_block_global_offset_y + j;
  591. }
  592.  
  593. public:
  594. MPIComputations(int inpM, int inpN, int inpGx, int inpGy, int inpx_left, int inpx_right, int inpy_bottom, int inpy_top, int inp_cur_block_global_coors_x, int inp_cur_block_global_coors_y, int inprank, MPI_Comm inpcomm){
  595.  
  596. M = inpM;
  597. N = inpN;
  598.  
  599. Gx = inpGx;
  600. Gy = inpGy;
  601.  
  602. x_left = inpx_left;
  603. x_right = inpx_right;
  604. y_bottom = inpy_bottom;
  605. y_top = inpy_top;
  606.  
  607. h_x = double((x_right - x_left)) / M;
  608. h_y = double((y_top - y_bottom)) / N;
  609.  
  610. my_rank = inprank;
  611. comm = inpcomm;
  612.  
  613. cur_block_global_coors_x = inp_cur_block_global_coors_x;
  614. cur_block_global_coors_y = inp_cur_block_global_coors_y;
  615.  
  616. global_block_size_x = (M + 1) / Gx;
  617. global_block_size_y = (N + 1) / Gy;
  618.  
  619. cur_block_size_x = global_block_size_x;
  620. cur_block_size_y = global_block_size_y;
  621.  
  622. cur_block_global_offset_x = global_block_size_x * cur_block_global_coors_x;
  623. cur_block_global_offset_y = global_block_size_y * cur_block_global_coors_y;
  624.  
  625. //if ((cur_block_global_offset_x + global_block_size_x + Gx) > (M + 1)){
  626. if(cur_block_global_coors_x == Gx - 1 ){
  627. cur_block_size_x += (M + 1) % Gx;
  628. }
  629.  
  630. //if ((cur_block_global_offset_y + global_block_size_y + Gy) > (N + 1)){
  631. if(cur_block_global_coors_y == Gy - 1){
  632. cur_block_size_y += (N + 1) % Gy;
  633. }
  634.  
  635. internal_data.resize(cur_block_size_x * cur_block_size_y);
  636. old_internal_data.resize(cur_block_size_x * cur_block_size_y);
  637.  
  638.  
  639.  
  640. vector <vector< double> > tmp_coefs(cur_block_size_x * cur_block_size_y, vector<double>(5, 0.0));
  641.  
  642. coefs = tmp_coefs;
  643.  
  644. for(int i = 0; i < cur_block_size_x; i++){
  645. for (int j = 0; j < cur_block_size_y; j++){
  646. vector <double> &my_coefs = coefs[j + cur_block_size_y*i];
  647. ComputeCoefsForNode(my_coefs[0], my_coefs[1], my_coefs[2], my_coefs[3], my_coefs[4], i, j);
  648. }
  649. }
  650.  
  651. //OX
  652. external_data[0].resize(cur_block_size_y);
  653. external_data[1].resize(cur_block_size_y);
  654.  
  655. //OY
  656. external_data[2].resize(cur_block_size_x);
  657. external_data[3].resize(cur_block_size_x);
  658.  
  659. }
  660.  
  661. double Get(int i, int j) {
  662. return GetLocalIndex(i - cur_block_global_offset_x, j - cur_block_global_offset_y);
  663. }
  664.  
  665. void Set(int i, int j, double v) {
  666. return SetLocalIndex(i - cur_block_global_offset_x, j - cur_block_global_offset_y, v);
  667. }
  668.  
  669. void SyncMPI(){
  670.  
  671. /*if(my_rank == 0)
  672. cout << "[INFO]: Sync started"<< endl;
  673. */
  674.  
  675. //MPI_Barrier(MPI_COMM_WORLD);
  676.  
  677. //left and right sides
  678. //#pragma omp parallel for
  679. for(int j = 0; j < cur_block_size_y; ++j){
  680.  
  681. external_data[ 0 ][ j ] = GetLocalIndex(0, j);//internal_data[ j ];
  682. external_data[ 1 ][ j ] = GetLocalIndex(cur_block_size_x - 1, j);//internal_data[ j + cur_block_size_y * (cur_block_size_x - 1) ];
  683.  
  684. }
  685.  
  686. //bottom and top sides
  687. //#pragma omp parallel for
  688. for(int i = 0; i < cur_block_size_x; ++i){
  689.  
  690. external_data[ 2 ][ i ] = GetLocalIndex(i, 0);//internal_data[ cur_block_size_y*i ];
  691. external_data[ 3 ][ i ] = GetLocalIndex(i, cur_block_size_y - 1); //internal_data[ (cur_block_size_y - 1) + cur_block_size_y*i ];
  692.  
  693. }
  694.  
  695. int my_coords[2];
  696. int targets_ranks[4];
  697.  
  698. MPI_Cart_coords(comm, my_rank, 2, my_coords);
  699.  
  700. int neighbour_offsets[ 4 ][ 2 ] = {
  701. { -1, 0 },{ 1, 0 },
  702. { 0, -1 },{ 0, 1 }
  703. };
  704.  
  705. //#pragma omp parallel for
  706. for(int i = 0; i < 4; i++){
  707.  
  708. int target_coords[2];
  709.  
  710. target_coords[ 0 ] = my_coords[ 0 ] + neighbour_offsets[ i ][ 0 ];
  711. target_coords[ 1 ] = my_coords[ 1 ] + neighbour_offsets[ i ][ 1 ];
  712.  
  713. if (target_coords[0] >= 0 && target_coords[0] < Gx && target_coords[1] >= 0 && target_coords[1] < Gy){
  714.  
  715. MPI_Cart_rank(comm, target_coords, &targets_ranks[ i ]);
  716.  
  717. }
  718. else{
  719. targets_ranks[i] = -1;
  720. }
  721. }
  722.  
  723. //Now we have rank for all targets
  724.  
  725. for(int axis = 0; axis < 2; axis++){
  726.  
  727. int parity_bit = (my_coords[ axis ]) % 2;
  728.  
  729. //if parity_bit == 0 then
  730. // zuerst mit links, dann mit rechts tauschen
  731. //elif parity_bit == 1:
  732. // zuerst mit rechts, dann mit links tauschen
  733.  
  734. for(int tmp = 0; tmp < 2; tmp++){
  735. parity_bit = 1 - parity_bit;
  736.  
  737. //target id in external_data und targets_ranks
  738. int target_idx = 2 * axis + (1 - parity_bit);
  739.  
  740. if (targets_ranks[target_idx] != -1){
  741.  
  742. int send_tag = 100000 + my_rank * 100 + axis * 10 + parity_bit;
  743. int recv_tag = 100000 + targets_ranks[ target_idx ] * 100 + axis * 10 + (1-parity_bit);
  744.  
  745. MPI_Status tmp_status;
  746.  
  747. if(my_rank != targets_ranks[ target_idx ]){
  748.  
  749. MPI_Sendrecv_replace(&external_data[ target_idx ][ 0 ], external_data[ target_idx ].size(),
  750. MPI_DOUBLE, targets_ranks[ target_idx ], send_tag, targets_ranks[ target_idx ], recv_tag,
  751. comm, &tmp_status);
  752.  
  753. }
  754. }
  755. }
  756. }
  757.  
  758. //MPI_Barrier(MPI_COMM_WORLD);
  759.  
  760. }
  761.  
  762. void DoIteration(bool &should_i_stop){
  763.  
  764. ComputeMatrixR();
  765.  
  766. //Now R Matrix is in internal_data
  767.  
  768. double tau = ComputeTauAndStopCase(should_i_stop);
  769.  
  770. //We have in our block jetzt:
  771. //in internal_data: R Matrix
  772. //in old_internal_data: w aus letzte iteration
  773. //and we have tau
  774. //jetzt koennen wir naechste w finden
  775.  
  776. ComputeNewW(tau);
  777.  
  778. SyncMPI();
  779.  
  780. }
  781.  
  782. double GetLocalIndex(int i, int j){
  783. //internal data
  784. if ((j >= 0) && (j < cur_block_size_y) && (i >= 0) && (i < cur_block_size_x)){
  785. return internal_data[ j + cur_block_size_y*i ];
  786. }
  787.  
  788. //external data
  789. //OX
  790. if((j >= 0) && (j < cur_block_size_y)){
  791.  
  792. if (i == -1)
  793. return external_data[ 0 ][ j ];
  794.  
  795. if (i == cur_block_size_x)
  796. return external_data[ 1 ][ j ];
  797.  
  798. }
  799.  
  800. //OY
  801. if((i >= 0) && (i < cur_block_size_x)){
  802.  
  803. if (j == -1)
  804. return external_data[ 2 ][ i ];
  805. if (j == cur_block_size_y)
  806. return external_data[ 3 ][ i ];
  807.  
  808. }
  809.  
  810. cout << "[ERROR]: bad local index" << endl;
  811.  
  812. return nan("");
  813. }
  814.  
  815. void SetLocalIndex(int i, int j, double v){
  816. if ((j >= 0) && (j < cur_block_size_y) && (i >= 0) && (i < cur_block_size_x)){
  817. internal_data[ j + cur_block_size_y*i ] = v;
  818. }else{
  819. cout << "[ERROR]: trying to set data outside the local area" << endl;
  820. }
  821. }
  822.  
  823. double CompareWithExact() {
  824.  
  825. double local_diff_norm = 0.0;
  826. double global_diff_norm = 0.0;
  827. /*
  828. if (my_rank == 0)
  829. cout << "[INFO]: Starting computing compare with exact" << endl;
  830. */
  831. vector <double> tmp_elements(cur_block_size_x * cur_block_size_y, 0.0);
  832.  
  833. //#pragma omp parallel for
  834. for (int i = 0; i < cur_block_size_x; ++i){
  835. for (int j = 0; j < cur_block_size_y; ++j){
  836.  
  837.  
  838. int current_node_global_offset_x = GetGlobalX(i),
  839. current_node_global_offset_y = GetGlobalY(j);
  840.  
  841. int glob_x = current_node_global_offset_x,
  842. glob_y = current_node_global_offset_y;
  843.  
  844. double tmp_elem = Get(glob_x, glob_y) - GetNodeFromExact(glob_x, glob_y);
  845.  
  846. //local_diff_norm = max( fabs(tmp_elem), local_diff_norm);
  847.  
  848. //local_diff_norm += rho * pow(tmp_elem, 2) * h_x * h_y;
  849.  
  850. tmp_elements [j + cur_block_size_y*i] = tmp_elem;
  851.  
  852. }
  853. }
  854.  
  855. //cout << "[INFO]: local max diff in " << cur_block_global_offset_x << " " << cur_block_global_offset_y << " " << local_diff_norm << endl;
  856.  
  857.  
  858. for (int i = 0; i < cur_block_size_x; ++i){
  859. for (int j = 0; j < cur_block_size_y; ++j){
  860.  
  861. double rho = 1.0;
  862.  
  863. int current_node_global_offset_x = GetGlobalX(i),
  864. current_node_global_offset_y = GetGlobalY(j);
  865.  
  866. int glob_x = current_node_global_offset_x,
  867. glob_y = current_node_global_offset_y;
  868.  
  869.  
  870. if ((i > 0) && (i < cur_block_size_x - 1) && (j > 0) && (j < cur_block_size_y - 1))
  871. rho = 1.0;
  872. else{
  873. switch (IsNodeInternalCornerOrSide(glob_x, glob_y)){
  874. case 2:
  875. case 4:
  876. case 6:
  877. case 8:
  878. //angle
  879. rho = 0.25;
  880. break;
  881. case 1:
  882. case 3:
  883. case 5:
  884. case 7:
  885. //side
  886. rho = 0.5;
  887. break;
  888. case 0:
  889. //internal
  890. rho = 1.0;
  891. break;
  892. default:
  893. cout << "[ERROR]: Bad global coords compute exact. Local:" << i << " " << j << ". Global:" << glob_x << " " << glob_y <<endl;
  894. }
  895. }
  896.  
  897. double tmp_elem = tmp_elements [j + cur_block_size_y*i];
  898.  
  899. local_diff_norm = max( fabs(tmp_elem), local_diff_norm);
  900. //local_diff_norm += rho * pow(tmp_elem, 2) * h_x * h_y;
  901. }
  902. }
  903.  
  904. MPI_Allreduce(&local_diff_norm, &global_diff_norm, 1, MPI_DOUBLE, MPI_MAX,
  905. MPI_COMM_WORLD);
  906.  
  907. //global_diff_norm = sqrt(global_diff_norm);
  908.  
  909.  
  910. return global_diff_norm;
  911. }
  912.  
  913. double GetCurrentNorm(){
  914. return current_norm;
  915. }
  916. };
  917.  
  918.  
  919.  
  920.  
  921. int main(int argc, char* argv[]){
  922.  
  923.  
  924. const double x_left = -2, x_right = 3;
  925. const double y_bottom = -1, y_top = 4;
  926.  
  927. double time_start, time_stop;
  928. int N, Gx, Gy;
  929. int dim[2], period[2];
  930.  
  931. MPI_Comm comm;
  932.  
  933. //N - global grid size
  934. N = atoi(argv[1]);
  935.  
  936. //Gx
  937. Gx = dim[0] = atoi(argv[2]);
  938.  
  939. //Gy
  940. Gy = dim[1] = atoi(argv[3]);
  941.  
  942.  
  943. stringstream ss;
  944. ss<< "./logs/out" << N << Gx*Gy << ".res";
  945.  
  946. string filename;
  947.  
  948. ss >> filename;
  949.  
  950. ofstream out(filename.c_str());
  951. streambuf *coutbuf = cout.rdbuf(); //save old buf
  952. cout.rdbuf(out.rdbuf());
  953.  
  954.  
  955. stringstream sserr;
  956. sserr<< "./logs/out" << N << Gx*Gy << ".err";
  957.  
  958. string filenameerr;
  959.  
  960. sserr >> filenameerr;
  961.  
  962. ofstream err(filenameerr.c_str());
  963. streambuf *cerrbuf = cerr.rdbuf(); //save old buf
  964. cerr.rdbuf(err.rdbuf());
  965.  
  966. period[0]=0;
  967. period[1]=0;
  968.  
  969.  
  970. MPI_Init(&argc, &argv);
  971.  
  972. time_start = MPI_Wtime();
  973.  
  974. int world_size;
  975.  
  976. int my_rank;
  977.  
  978. MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
  979. MPI_Comm_size(MPI_COMM_WORLD, &world_size);
  980.  
  981.  
  982. if(my_rank == 0){
  983. cout << "[INFO]: N = " << N << endl;
  984. cout << "[INFO]: Gx = " << dim[0] << endl;
  985. cout << "[INFO]: Gy = " << dim[1] << endl;
  986. }
  987.  
  988.  
  989.  
  990. if(argc <= 3){
  991. if(my_rank == 0)
  992. cout << "[ERROR]: Usage: mpieval <N> <Gx> <Gy>" << endl;
  993.  
  994. MPI_Abort(MPI_COMM_WORLD, 1);
  995. }
  996.  
  997. if(Gx * Gy != world_size){
  998. if(my_rank == 0)
  999. cout << "[ERROR]: mpi world size is not equal to "<< Gx << "*" << Gy << endl;
  1000.  
  1001. MPI_Abort(MPI_COMM_WORLD, 1);
  1002. }
  1003.  
  1004. MPI_Cart_create(MPI_COMM_WORLD, 2, dim, period, 1, &comm);
  1005.  
  1006. /*if(my_rank == 0)
  1007. cout << "[INFO]: Cart created"<<endl;
  1008. */
  1009.  
  1010. MPI_Comm_rank(comm, &my_rank);
  1011.  
  1012. int my_coords[2];
  1013.  
  1014. MPI_Cart_coords(comm, my_rank, 2, my_coords);
  1015.  
  1016.  
  1017.  
  1018.  
  1019. class MPIComputations w_func(N, N, Gx, Gy, x_left, x_right, y_bottom, y_top, my_coords[0], my_coords[1], my_rank, comm);
  1020.  
  1021.  
  1022. int iteration_num = 0;
  1023.  
  1024. bool should_i_stop = false;
  1025.  
  1026. while ( should_i_stop != true ){
  1027.  
  1028. w_func.DoIteration(should_i_stop);
  1029.  
  1030. if ( (my_rank == 0) && (iteration_num % 10000 == 0)){
  1031. cout << "[INFO]: Iteration " << iteration_num << " difference with previous is: "<< w_func.GetCurrentNorm() << endl;
  1032. }
  1033.  
  1034. iteration_num++;
  1035. }
  1036.  
  1037. //MPI_Barrier(MPI_COMM_WORLD);
  1038.  
  1039. //vector <double> local_elements;
  1040.  
  1041. double comparing = w_func.CompareWithExact();
  1042.  
  1043. if (my_rank == 0)
  1044. cout << "[INFO]: Diff with exact solution: " << comparing << endl;
  1045.  
  1046.  
  1047.  
  1048. //myfile.close();
  1049.  
  1050. time_stop = MPI_Wtime();
  1051. if( my_rank == 0 )
  1052. cout << "Finished!" << endl
  1053. << "Total iterations: " << iteration_num << endl
  1054. << "Elapsed time: " << (time_stop - time_start) << endl
  1055. << "Time per iteration: " << (time_stop - time_start) / double(iteration_num) << endl;
  1056.  
  1057. MPI_Finalize();
  1058.  
  1059. cout.rdbuf(coutbuf); //reset to standard input again
  1060. cerr.rdbuf(cerrbuf); //reset to standard output again
  1061.  
  1062.  
  1063. return 0;
  1064. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement