Advertisement
manac68974

Untitled

Dec 18th, 2019
95
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 29.98 KB | None | 0 0
  1. #include <mpi.h>
  2. #include <omp.h>
  3.  
  4. #include <iomanip>
  5. #include <iostream>
  6. #include <vector>
  7. #include <map>
  8. #include <cmath>
  9. #include <cstdlib>
  10. #include <algorithm>
  11. #include <fstream>
  12. #include<sstream>
  13.  
  14. using namespace std;
  15.  
  16. class MPIComputations{
  17. int M, N;
  18. int cur_block_size_x, cur_block_size_y, cur_block_global_offset_x, cur_block_global_offset_y;
  19. int Gx, Gy;
  20. int cur_block_global_coors_x, cur_block_global_coors_y;
  21. double h_x, h_y;
  22. int global_block_size_x, global_block_size_y;
  23. int my_rank;
  24. double current_norm;
  25. MPI_Comm comm;
  26. double x_left, x_right, y_bottom, y_top;
  27.  
  28. vector<double> internal_data;
  29. vector<double> old_internal_data;
  30. vector<double> external_data[4];
  31.  
  32. vector <vector <double>> coefs;
  33.  
  34. int IsNodeInternalCornerOrSide(int current_node_global_offset_x, int current_node_global_offset_y){
  35.  
  36. //corners
  37. //left bottom corner
  38. if (current_node_global_offset_x == 0 && current_node_global_offset_y == 0){
  39. return 2;
  40. }
  41.  
  42. //left top corner
  43. if (current_node_global_offset_x == 0 && current_node_global_offset_y == N){
  44. return 4;
  45. }
  46.  
  47. //right bottom corner
  48. if (current_node_global_offset_x == M && current_node_global_offset_y == 0){
  49. return 6;
  50. }
  51.  
  52. //right top corner
  53. if (current_node_global_offset_x == M && current_node_global_offset_y == N){
  54. return 8;
  55. }
  56.  
  57. //sides
  58. //left side
  59. if (current_node_global_offset_y >= 1 && current_node_global_offset_y <= N - 1 &&
  60. current_node_global_offset_x == 0){
  61. return 1;
  62. }
  63.  
  64. //right side
  65. if (current_node_global_offset_y >= 1 && current_node_global_offset_y <= N - 1 &&
  66. current_node_global_offset_x == M){
  67. return 3;
  68. }
  69.  
  70. //bottom side
  71. if (current_node_global_offset_x >= 1 && current_node_global_offset_x <= M - 1 &&
  72. current_node_global_offset_y == 0){
  73. return 5;
  74. }
  75.  
  76. //top side
  77. if ((current_node_global_offset_x >= 1 && current_node_global_offset_x <= M - 1 &&
  78. current_node_global_offset_y == N)){
  79. return 7;
  80. }
  81.  
  82. //internal
  83. if ((current_node_global_offset_x >= 1 && current_node_global_offset_x <= M - 1) &&
  84. (current_node_global_offset_y >= 1 && current_node_global_offset_y <= N - 1)){
  85. return 0;
  86. }
  87.  
  88. return -1;
  89. }
  90.  
  91. double k(double x, double y) {
  92. return 1 + pow(x + y, 2);
  93. }
  94.  
  95. double q(double x, double y) {
  96. return 1;
  97. }
  98.  
  99. double u(double x, double y) {
  100. return 2.0 / (1 + pow(x, 2) + pow(y, 2));
  101. }
  102.  
  103. // psi_R(x, y) = k(A2, y) * du/dx(A2, y)
  104. double psi_R(double y) {
  105. return (-12) * (pow((y + 3), 2) + 1) / pow((pow(y, 2) + 10), 2);
  106. }
  107.  
  108. // psi_L(x, y) = -k(A1, y) * du/dx(A1, y)
  109. double psi_L(double y) {
  110. return (-8) * (pow((y - 2), 2) + 1) / pow((pow(y, 2) + 5), 2);
  111. }
  112.  
  113. // psi_T(x, y) = k(x, B2) * du/dy(x, B2)
  114. double psi_T(double x) {
  115. return (-16) * (pow((x + 4), 2) + 1) / pow((pow(x, 2) + 17), 2);
  116. }
  117.  
  118. // psi_B(x, y) = -k(x, B1) * du/dy(x, B1)
  119. double psi_B(double x) {
  120. return (-4) * (pow((x - 1), 2) + 1) / pow((pow(x, 2) + 2), 2);
  121. }
  122.  
  123. // right-part function of Poisson equation
  124. double F(double x, double y) {
  125. return 2 * (pow(x,4) + pow(y,4) + 2 * (pow(x,2) + 3) * pow(y,2) + 6 * pow(x,2) + 16*x*y + 5)
  126. / pow((1 + pow(x, 2) + pow(y, 2)), 3);
  127. }
  128.  
  129. //inner_product(A[i], internal_data)
  130.  
  131. //inner_product(A[i], internal_data)
  132. void ComputeCoefsForNode (double &left_coeff, double &right_coeff, double &bottom_coeff, double &top_coeff, double &this_coeff, int i, int j){
  133.  
  134. int glob_x = GetGlobalX(i);
  135. int glob_y = GetGlobalY(j);
  136.  
  137. map <string,bool> neighbours = {
  138. {"left", true},
  139. {"right", true},
  140. {"bottom", true},
  141. {"top", true}
  142. };
  143.  
  144. left_coeff = 1.0;
  145. right_coeff = 1.0;
  146. bottom_coeff = 1.0;
  147. top_coeff = 1.0;
  148. this_coeff = 1.0;
  149.  
  150. switch (IsNodeInternalCornerOrSide(glob_x, glob_y)){
  151. case 2:
  152. //left bottom corner
  153. neighbours["left"] = false;
  154. neighbours["bottom"] = false;
  155. break;
  156. case 4:
  157. //left top corner
  158. neighbours["left"] = false;
  159. neighbours["top"] = false;
  160. break;
  161. case 6:
  162. //right bottom corner
  163. neighbours["right"] = false;
  164. neighbours["bottom"] = false;
  165. break;
  166. case 8:
  167. //right top corner
  168. neighbours["right"] = false;
  169. neighbours["top"] = false;
  170. break;
  171. case 1:
  172. //left side
  173. neighbours["left"] = false;
  174. break;
  175. case 3:
  176. //right side
  177. neighbours["right"] = false;
  178. break;
  179. case 5:
  180. //bottom side
  181. neighbours["bottom"] = false;
  182. break;
  183. case 7:
  184. //top side
  185. neighbours["top"] = false;
  186. break;
  187. case 0:
  188. //internal
  189. break;
  190. default:
  191. cout << "[ERROR]: Bad global coords compute coef. Global:" << glob_x << " " << glob_y<<endl;
  192. }
  193.  
  194. if (!neighbours["left"]){
  195. right_coeff = 2.0;
  196. left_coeff = 0.0;
  197. }
  198.  
  199. if (!neighbours["right"]){
  200. left_coeff = 2.0;
  201. right_coeff = 0.0;
  202. }
  203.  
  204. if (!neighbours["bottom"]){
  205. top_coeff = 2.0;
  206. bottom_coeff = 0.0;
  207. }
  208.  
  209. if (!neighbours["top"]){
  210. bottom_coeff = 2.0;
  211. top_coeff = 0.0;
  212. }
  213.  
  214. if (neighbours["left"]){
  215. left_coeff *= -k(x_left + (glob_x - 0.5) * h_x, y_bottom + glob_y * h_y) / pow(h_x, 2);
  216. }
  217.  
  218. if (neighbours["right"]){
  219. right_coeff *= -k(x_left + (glob_x + 0.5) * h_x, y_bottom + glob_y * h_y) / pow(h_x, 2);
  220. }
  221.  
  222. if (neighbours["bottom"]){
  223. bottom_coeff *= -k(x_left + glob_x * h_x, y_bottom + (glob_y - 0.5) * h_y) / pow(h_y, 2);
  224. }
  225.  
  226. if (neighbours["top"]){
  227. top_coeff *= -k(x_left + glob_x * h_x, y_bottom + (glob_y + 0.5) * h_y) / pow(h_y, 2);
  228. }
  229.  
  230. this_coeff = q(x_left + glob_x * h_x, y_bottom + glob_y * h_y) - left_coeff - right_coeff - bottom_coeff - top_coeff;
  231.  
  232. /*coefs[0] = left_coeff;
  233. coefs[1] = right_coeff;
  234. coefs[2] = bottom_coeff;
  235. coefs[3] = top_coeff;
  236. coefs[4] = this_coeff;*/
  237. }
  238.  
  239. double ComputeMagicInnerProductA_iw (int i, int j){
  240.  
  241. int glob_x = GetGlobalX(i);
  242. int glob_y = GetGlobalY(j);
  243.  
  244. double result = 0.0;
  245.  
  246. map <string,bool> neighbours = {
  247. {"left", true},
  248. {"right", true},
  249. {"bottom", true},
  250. {"top", true}
  251. };
  252.  
  253. vector <double> my_coefs = coefs[j + cur_block_size_y*i];
  254.  
  255. double left_neighbour = 0.0, right_neighbour = 0.0, bottom_neighbour = 0.0, top_neighbour = 0.0, this_node = 0.0;
  256. double left_coeff = my_coefs[0], right_coeff = my_coefs[1], bottom_coeff = my_coefs[2], top_coeff = my_coefs[3], this_coeff = my_coefs[4];
  257.  
  258. switch (IsNodeInternalCornerOrSide(glob_x, glob_y)){
  259. case 2:
  260. //left bottom corner
  261. neighbours["left"] = false;
  262. neighbours["bottom"] = false;
  263. break;
  264. case 4:
  265. //left top corner
  266. neighbours["left"] = false;
  267. neighbours["top"] = false;
  268. break;
  269. case 6:
  270. //right bottom corner
  271. neighbours["right"] = false;
  272. neighbours["bottom"] = false;
  273. break;
  274. case 8:
  275. //right top corner
  276. neighbours["right"] = false;
  277. neighbours["top"] = false;
  278. break;
  279. case 1:
  280. //left side
  281. neighbours["left"] = false;
  282. break;
  283. case 3:
  284. //right side
  285. neighbours["right"] = false;
  286. break;
  287. case 5:
  288. //bottom side
  289. neighbours["bottom"] = false;
  290. break;
  291. case 7:
  292. //top side
  293. neighbours["top"] = false;
  294. break;
  295. case 0:
  296. //internal
  297. break;
  298. default:
  299. cout << "[ERROR]: Bad global coords compute matrix. Global:" << glob_x << " " << glob_y<<endl;
  300. }
  301.  
  302.  
  303.  
  304. if (neighbours["left"]){
  305. left_neighbour = Get(glob_x - 1, glob_y);
  306. }
  307.  
  308. if (neighbours["right"]){
  309. right_neighbour = Get(glob_x + 1, glob_y);
  310. }
  311.  
  312. if (neighbours["bottom"]){
  313. bottom_neighbour = Get(glob_x, glob_y - 1);
  314. }
  315.  
  316. if (neighbours["top"]){
  317. top_neighbour = Get(glob_x, glob_y + 1);
  318. }
  319.  
  320. this_node = Get(glob_x, glob_y);
  321.  
  322. result = left_coeff * left_neighbour +
  323. right_coeff * right_neighbour +
  324. bottom_coeff * bottom_neighbour +
  325. top_coeff * top_neighbour +
  326. this_coeff * this_node;
  327.  
  328. return result;
  329. }
  330.  
  331. double GetNodeFromB(int current_node_global_offset_x, int current_node_global_offset_y) {
  332.  
  333. int glob_x = current_node_global_offset_x;
  334. int glob_y = current_node_global_offset_y;
  335.  
  336. double result = 0.0;
  337.  
  338. switch (IsNodeInternalCornerOrSide(glob_x, glob_y)){
  339. case 2:
  340. //left bottom corner
  341. result = F(x_left, y_bottom) + 2.0 / h_x * psi_L(y_bottom) + 2.0 / h_y * psi_B(x_left);
  342. break;
  343. case 4:
  344. //left top corner
  345. result = F(x_left, y_top) + 2.0 / h_x * psi_L(y_top) + 2.0 / h_y * psi_T(x_left);
  346. break;
  347. case 6:
  348. //right bottom corner
  349. result = F(x_right, y_bottom) + 2.0 / h_x * psi_R(y_bottom) + 2.0 / h_y * psi_B(x_right);
  350. break;
  351. case 8:
  352. //right top corner
  353. result = F(x_right, y_top) + 2.0 / h_x * psi_R(y_top) + 2.0 / h_y * psi_T(x_right);
  354. break;
  355. case 1:
  356. //left side
  357. result = F(x_left, y_bottom + glob_y * h_y) + 2.0 / h_x * psi_L(y_bottom + glob_y * h_y);
  358. break;
  359. case 3:
  360. //right side
  361. result = F(x_right, y_bottom + glob_y * h_y) + 2.0 / h_x * psi_R(y_bottom + glob_y * h_y);
  362. break;
  363. case 5:
  364. //bottom side
  365. result = F(x_left + glob_x * h_x, y_bottom) + 2.0 / h_y * psi_B(x_left + glob_x * h_x);
  366. break;
  367. case 7:
  368. //top side
  369. result = F(x_left + glob_x * h_x, y_top) + 2.0 / h_y * psi_T(x_left + glob_x * h_x);
  370. break;
  371. case 0:
  372. //internal
  373. result = F(x_left + glob_x * h_x, y_bottom + glob_y * h_y);
  374. break;
  375. default:
  376. cout << "[ERROR]: Bad global coords compute matrix. Global:" << glob_x << " " << glob_y <<endl;
  377.  
  378. }
  379.  
  380. return result;
  381.  
  382. }
  383.  
  384. double GetNodeFromExact(int current_node_global_offset_x, int current_node_global_offset_y) {
  385.  
  386. int glob_x = current_node_global_offset_x;
  387. int glob_y = current_node_global_offset_y;
  388.  
  389. return u(x_left + glob_x * h_x, y_bottom + glob_y * h_y);
  390.  
  391. }
  392.  
  393. void ComputeMatrixR(){
  394. /*if(my_rank == 0)
  395. cout << "[INFO]: Computation of matrix r started"<<endl;
  396. */
  397.  
  398. vector<double> r_tmp_matrix (cur_block_size_x*cur_block_size_y, 0.0);
  399.  
  400. //#pragma omp parallel for
  401. for(int i = 0; i < cur_block_size_x; ++i){
  402. for(int j = 0; j < cur_block_size_y; ++j){
  403.  
  404. int current_node_global_offset_x = GetGlobalX(i),
  405. current_node_global_offset_y = GetGlobalY(j);
  406.  
  407. int glob_x = current_node_global_offset_x,
  408. glob_y = current_node_global_offset_y;
  409.  
  410. r_tmp_matrix [ j + cur_block_size_y*i ] = ComputeMagicInnerProductA_iw(i,j) - GetNodeFromB(glob_x, glob_y);
  411.  
  412. }
  413. }
  414.  
  415. //#pragma omp parallel for
  416. for(int i = 0; i < cur_block_size_x; ++i){
  417. for(int j = 0; j < cur_block_size_y; ++j){
  418.  
  419. old_internal_data[ j + cur_block_size_y*i ] = internal_data[ j + cur_block_size_y*i];
  420. internal_data[ j + cur_block_size_y*i ] = r_tmp_matrix[ j + cur_block_size_y*i];
  421.  
  422. }
  423. }
  424.  
  425. //old_internal_data = internal_data;
  426. //internal_data = r_tmp_matrix;
  427.  
  428. SyncMPI();
  429.  
  430. }
  431.  
  432. double ComputeTauAndStopCase(bool &should_i_stop){
  433.  
  434. double local_Ar_r_inner_product_sum = 0.0;
  435. double local_Ar_Ar_inner_product_sum = 0.0;
  436. double global_Ar_r_inner_product_sum = 0.0;
  437. double global_Ar_Ar_inner_product_sum = 0.0;
  438. double local_r_norm = 0.0;
  439. double global_r_norm = 0.0;
  440.  
  441. //#pragma omp parallel for
  442. for(int i = 0; i < cur_block_size_x; ++i){
  443. for(int j = 0; j < cur_block_size_y; ++j){
  444. double rho = 1.0;
  445.  
  446. int current_node_global_offset_x = GetGlobalX(i),
  447. current_node_global_offset_y = GetGlobalY(j);
  448.  
  449. int glob_x = current_node_global_offset_x,
  450. glob_y = current_node_global_offset_y;
  451.  
  452. double tmp_Ar_i_j = ComputeMagicInnerProductA_iw(i, j);
  453.  
  454. switch (IsNodeInternalCornerOrSide(glob_x, glob_y)){
  455. case 2:
  456. case 4:
  457. case 6:
  458. case 8:
  459. //angle
  460. rho = 0.25;
  461. break;
  462. case 1:
  463. case 3:
  464. case 5:
  465. case 7:
  466. //side
  467. rho = 0.5;
  468. break;
  469. case 0:
  470. //internal
  471. rho = 1.0;
  472. break;
  473. default:
  474. cout << "[ERROR]: Bad global coords compute tau. Global:" << glob_x << " " << glob_y << endl;
  475. }
  476.  
  477. double tmp_cur_node_value = Get(glob_x, glob_y);
  478.  
  479. local_Ar_r_inner_product_sum += rho * tmp_Ar_i_j * tmp_cur_node_value * h_x*h_y;
  480. local_Ar_Ar_inner_product_sum += rho * pow (tmp_Ar_i_j, 2) * h_x*h_y;
  481. local_r_norm += rho * pow(tmp_cur_node_value, 2) * h_x*h_y;
  482.  
  483. }
  484. }
  485.  
  486. //cout << "[DEBUG]: Local"<< local_Ar_r_inner_product_sum << endl;
  487.  
  488. MPI_Allreduce(&local_Ar_r_inner_product_sum, &global_Ar_r_inner_product_sum, 1, MPI_DOUBLE, MPI_SUM,
  489. comm);
  490.  
  491. //cout << "[DEBUG]: "<< global_Ar_r_inner_product_sum << endl;
  492.  
  493. MPI_Allreduce(&local_Ar_Ar_inner_product_sum, &global_Ar_Ar_inner_product_sum, 1, MPI_DOUBLE, MPI_SUM,
  494. MPI_COMM_WORLD);
  495.  
  496. //cout << "[DEBUG]: "<< global_Ar_Ar_inner_product_sum << endl;
  497.  
  498. double global_tau = global_Ar_r_inner_product_sum/ global_Ar_Ar_inner_product_sum;
  499.  
  500. MPI_Allreduce(&local_r_norm, &global_r_norm, 1, MPI_DOUBLE, MPI_SUM,
  501. MPI_COMM_WORLD);
  502.  
  503. double eps = 1e-06;
  504.  
  505. if (global_r_norm < 0){
  506. cout << "[ERROR]: bad global r norm" << endl;
  507. }
  508.  
  509. current_norm = fabs(global_tau)*sqrt(global_r_norm);
  510.  
  511. //if (my_rank == 0)
  512. // cout << "[DEBUG]: solution norm "<< tmp_norm << endl;
  513.  
  514. if (current_norm <= eps){
  515. if (my_rank == 0)
  516. cout << "[INFO]: r norm is " << sqrt(global_r_norm) << endl;
  517. should_i_stop = true;
  518. }else{
  519. should_i_stop = false;
  520. }
  521.  
  522. return global_tau;
  523.  
  524. }
  525.  
  526.  
  527. void ComputeNewW(double tau){
  528. //#pragma omp parallel for
  529. for(int i = 0; i < cur_block_size_x; ++i){
  530. for(int j = 0; j < cur_block_size_y; ++j){
  531. internal_data[ j + cur_block_size_y*i ] = old_internal_data[ j + cur_block_size_y*i ] - tau * internal_data[ j + cur_block_size_y*i ];
  532. //old_internal_data[ j + cur_block_size_y*i ] = 0.0;
  533. }
  534. }
  535. }
  536.  
  537. int GetGlobalX(int i){
  538. return cur_block_global_offset_x + i;
  539. }
  540.  
  541. int GetGlobalY(int j){
  542. return cur_block_global_offset_y + j;
  543. }
  544.  
  545. public:
  546. MPIComputations(int inpM, int inpN, int inpGx, int inpGy, int inpx_left, int inpx_right, int inpy_bottom, int inpy_top, int inp_cur_block_global_coors_x, int inp_cur_block_global_coors_y, int inprank, MPI_Comm inpcomm){
  547.  
  548. M = inpM;
  549. N = inpN;
  550.  
  551. Gx = inpGx;
  552. Gy = inpGy;
  553.  
  554. x_left = inpx_left;
  555. x_right = inpx_right;
  556. y_bottom = inpy_bottom;
  557. y_top = inpy_top;
  558.  
  559. h_x = double((x_right - x_left)) / M;
  560. h_y = double((y_top - y_bottom)) / N;
  561.  
  562. my_rank = inprank;
  563. comm = inpcomm;
  564.  
  565. cur_block_global_coors_x = inp_cur_block_global_coors_x;
  566. cur_block_global_coors_y = inp_cur_block_global_coors_y;
  567.  
  568. global_block_size_x = (M + 1) / Gx;
  569. global_block_size_y = (N + 1) / Gy;
  570.  
  571. cur_block_size_x = global_block_size_x;
  572. cur_block_size_y = global_block_size_y;
  573.  
  574. cur_block_global_offset_x = global_block_size_x * cur_block_global_coors_x;
  575. cur_block_global_offset_y = global_block_size_y * cur_block_global_coors_y;
  576.  
  577. //if ((cur_block_global_offset_x + global_block_size_x + Gx) > (M + 1)){
  578. if(cur_block_global_coors_x == Gx - 1 ){
  579. cur_block_size_x += (M + 1) % Gx;
  580. }
  581.  
  582. //if ((cur_block_global_offset_y + global_block_size_y + Gy) > (N + 1)){
  583. if(cur_block_global_coors_y == Gy - 1){
  584. cur_block_size_y += (N + 1) % Gy;
  585. }
  586.  
  587. internal_data.resize(cur_block_size_x * cur_block_size_y);
  588. old_internal_data.resize(cur_block_size_x * cur_block_size_y);
  589.  
  590.  
  591.  
  592. vector <vector< double>> tmp_coefs(cur_block_size_x * cur_block_size_y, vector<double>(5, 0.0));
  593.  
  594. coefs = tmp_coefs;
  595.  
  596. for(int i = 0; i < cur_block_size_x; i++){
  597. for (int j = 0; j < cur_block_size_y; j++){
  598. vector <double> &my_coefs = coefs[j + cur_block_size_y*i];
  599. ComputeCoefsForNode(my_coefs[0], my_coefs[1], my_coefs[2], my_coefs[3], my_coefs[4], i, j);
  600. }
  601. }
  602.  
  603. //OX
  604. external_data[0].resize(cur_block_size_y);
  605. external_data[1].resize(cur_block_size_y);
  606.  
  607. //OY
  608. external_data[2].resize(cur_block_size_x);
  609. external_data[3].resize(cur_block_size_x);
  610.  
  611. }
  612.  
  613. double Get(int i, int j) {
  614. return GetLocalIndex(i - cur_block_global_offset_x, j - cur_block_global_offset_y);
  615. }
  616.  
  617. void Set(int i, int j, double v) {
  618. return SetLocalIndex(i - cur_block_global_offset_x, j - cur_block_global_offset_y, v);
  619. }
  620.  
  621. void SyncMPI(){
  622.  
  623. /*if(my_rank == 0)
  624. cout << "[INFO]: Sync started"<< endl;
  625. */
  626.  
  627. //MPI_Barrier(MPI_COMM_WORLD);
  628.  
  629. //left and right sides
  630. //#pragma omp parallel for
  631. for(int j = 0; j < cur_block_size_y; ++j){
  632.  
  633. external_data[ 0 ][ j ] = GetLocalIndex(0, j);//internal_data[ j ];
  634. external_data[ 1 ][ j ] = GetLocalIndex(cur_block_size_x - 1, j);//internal_data[ j + cur_block_size_y * (cur_block_size_x - 1) ];
  635.  
  636. }
  637.  
  638. //bottom and top sides
  639. //#pragma omp parallel for
  640. for(int i = 0; i < cur_block_size_x; ++i){
  641.  
  642. external_data[ 2 ][ i ] = GetLocalIndex(i, 0);//internal_data[ cur_block_size_y*i ];
  643. external_data[ 3 ][ i ] = GetLocalIndex(i, cur_block_size_y - 1); //internal_data[ (cur_block_size_y - 1) + cur_block_size_y*i ];
  644.  
  645. }
  646.  
  647. int my_coords[2];
  648. int targets_ranks[4];
  649.  
  650. MPI_Cart_coords(comm, my_rank, 2, my_coords);
  651.  
  652. int neighbour_offsets[ 4 ][ 2 ] = {
  653. { -1, 0 },{ 1, 0 },
  654. { 0, -1 },{ 0, 1 }
  655. };
  656.  
  657. //#pragma omp parallel for
  658. for(int i = 0; i < 4; i++){
  659.  
  660. int target_coords[2];
  661.  
  662. target_coords[ 0 ] = my_coords[ 0 ] + neighbour_offsets[ i ][ 0 ];
  663. target_coords[ 1 ] = my_coords[ 1 ] + neighbour_offsets[ i ][ 1 ];
  664.  
  665. if (target_coords[0] >= 0 && target_coords[0] < Gx && target_coords[1] >= 0 && target_coords[1] < Gy){
  666.  
  667. MPI_Cart_rank(comm, target_coords, &targets_ranks[ i ]);
  668.  
  669. }
  670. else{
  671. targets_ranks[i] = -1;
  672. }
  673. }
  674.  
  675. //Now we have rank for all targets
  676.  
  677. for(int axis = 0; axis < 2; axis++){
  678.  
  679. int parity_bit = (my_coords[ axis ]) % 2;
  680.  
  681. //if parity_bit == 0 then
  682. // zuerst mit links, dann mit rechts tauschen
  683. //elif parity_bit == 1:
  684. // zuerst mit rechts, dann mit links tauschen
  685.  
  686. for(int tmp = 0; tmp < 2; tmp++){
  687. parity_bit = 1 - parity_bit;
  688.  
  689. //target id in external_data und targets_ranks
  690. int target_idx = 2 * axis + (1 - parity_bit);
  691.  
  692. if (targets_ranks[target_idx] != -1){
  693.  
  694. int send_tag = 100000 + my_rank * 100 + axis * 10 + parity_bit;
  695. int recv_tag = 100000 + targets_ranks[ target_idx ] * 100 + axis * 10 + (1-parity_bit);
  696.  
  697. MPI_Status tmp_status;
  698.  
  699. if(my_rank != targets_ranks[ target_idx ]){
  700.  
  701. MPI_Sendrecv_replace(&external_data[ target_idx ][ 0 ], external_data[ target_idx ].size(),
  702. MPI_DOUBLE, targets_ranks[ target_idx ], send_tag, targets_ranks[ target_idx ], recv_tag,
  703. comm, &tmp_status);
  704.  
  705. }
  706. }
  707. }
  708. }
  709.  
  710. //MPI_Barrier(MPI_COMM_WORLD);
  711.  
  712. }
  713.  
  714. void DoIteration(bool &should_i_stop){
  715.  
  716. ComputeMatrixR();
  717.  
  718. //Now R Matrix is in internal_data
  719.  
  720. double tau = ComputeTauAndStopCase(should_i_stop);
  721.  
  722. //We have in our block jetzt:
  723. //in internal_data: R Matrix
  724. //in old_internal_data: w aus letzte iteration
  725. //and we have tau
  726. //jetzt koennen wir naechste w finden
  727.  
  728. ComputeNewW(tau);
  729.  
  730. SyncMPI();
  731.  
  732. }
  733.  
  734. double GetLocalIndex(int i, int j){
  735. //internal data
  736. if ((j >= 0) && (j < cur_block_size_y) && (i >= 0) && (i < cur_block_size_x)){
  737. return internal_data[ j + cur_block_size_y*i ];
  738. }
  739.  
  740. //external data
  741. //OX
  742. if((j >= 0) && (j < cur_block_size_y)){
  743.  
  744. if (i == -1)
  745. return external_data[ 0 ][ j ];
  746.  
  747. if (i == cur_block_size_x)
  748. return external_data[ 1 ][ j ];
  749.  
  750. }
  751.  
  752. //OY
  753. if((i >= 0) && (i < cur_block_size_x)){
  754.  
  755. if (j == -1)
  756. return external_data[ 2 ][ i ];
  757. if (j == cur_block_size_y)
  758. return external_data[ 3 ][ i ];
  759.  
  760. }
  761.  
  762. cout << "[ERROR]: bad local index" << endl;
  763.  
  764. return nan("");
  765. }
  766.  
  767. void SetLocalIndex(int i, int j, double v){
  768. if ((j >= 0) && (j < cur_block_size_y) && (i >= 0) && (i < cur_block_size_x)){
  769. internal_data[ j + cur_block_size_y*i ] = v;
  770. }else{
  771. cout << "[ERROR]: trying to set data outside the local area" << endl;
  772. }
  773. }
  774.  
  775. double CompareWithExact() {
  776.  
  777. double local_diff_norm = 0.0;
  778. double global_diff_norm = 0.0;
  779. /*
  780. if (my_rank == 0)
  781. cout << "[INFO]: Starting computing compare with exact" << endl;
  782. */
  783. vector <double> tmp_elements(cur_block_size_x * cur_block_size_y, 0.0);
  784.  
  785. //#pragma omp parallel for
  786. for (int i = 0; i < cur_block_size_x; ++i){
  787. for (int j = 0; j < cur_block_size_y; ++j){
  788.  
  789.  
  790. int current_node_global_offset_x = GetGlobalX(i),
  791. current_node_global_offset_y = GetGlobalY(j);
  792.  
  793. int glob_x = current_node_global_offset_x,
  794. glob_y = current_node_global_offset_y;
  795.  
  796. double tmp_elem = Get(glob_x, glob_y) - GetNodeFromExact(glob_x, glob_y);
  797.  
  798. //local_diff_norm = max( fabs(tmp_elem), local_diff_norm);
  799.  
  800. //local_diff_norm += rho * pow(tmp_elem, 2) * h_x * h_y;
  801.  
  802. tmp_elements [j + cur_block_size_y*i] = tmp_elem;
  803.  
  804. }
  805. }
  806.  
  807. //cout << "[INFO]: local max diff in " << cur_block_global_offset_x << " " << cur_block_global_offset_y << " " << local_diff_norm << endl;
  808.  
  809.  
  810. for (int i = 0; i < cur_block_size_x; ++i){
  811. for (int j = 0; j < cur_block_size_y; ++j){
  812.  
  813. double rho = 1.0;
  814.  
  815. int current_node_global_offset_x = GetGlobalX(i),
  816. current_node_global_offset_y = GetGlobalY(j);
  817.  
  818. int glob_x = current_node_global_offset_x,
  819. glob_y = current_node_global_offset_y;
  820.  
  821. switch (IsNodeInternalCornerOrSide(glob_x, glob_y)){
  822. case 2:
  823. case 4:
  824. case 6:
  825. case 8:
  826. //angle
  827. rho = 0.25;
  828. break;
  829. case 1:
  830. case 3:
  831. case 5:
  832. case 7:
  833. //side
  834. rho = 0.5;
  835. break;
  836. case 0:
  837. //internal
  838. rho = 1.0;
  839. break;
  840. default:
  841. cout << "[ERROR]: Bad global coords compute exact. Local:" << i << " " << j << ". Global:" << glob_x << " " << glob_y <<endl;
  842. }
  843.  
  844.  
  845. double tmp_elem = tmp_elements [j + cur_block_size_y*i];
  846.  
  847. local_diff_norm = max( fabs(tmp_elem), local_diff_norm);
  848. //local_diff_norm += rho * pow(tmp_elem, 2) * h_x * h_y;
  849. }
  850. }
  851.  
  852. MPI_Allreduce(&local_diff_norm, &global_diff_norm, 1, MPI_DOUBLE, MPI_MAX,
  853. MPI_COMM_WORLD);
  854.  
  855. //global_diff_norm = sqrt(global_diff_norm);
  856.  
  857.  
  858. return global_diff_norm;
  859. }
  860.  
  861. double GetCurrentNorm(){
  862. return current_norm;
  863. }
  864. };
  865.  
  866.  
  867.  
  868.  
  869. int main(int argc, char* argv[]){
  870.  
  871. const double x_left = -2, x_right = 3;
  872. const double y_bottom = -1, y_top = 4;
  873.  
  874. double time_start, time_stop;
  875. int N, Gx, Gy;
  876. int dim[2], period[2];
  877.  
  878. MPI_Comm comm;
  879.  
  880. //N - global grid size
  881. N = atoi(argv[1]);
  882.  
  883. //Gx
  884. Gx = dim[0] = atoi(argv[2]);
  885.  
  886. //Gy
  887. Gy = dim[1] = atoi(argv[3]);
  888.  
  889. period[0]=0;
  890. period[1]=0;
  891.  
  892.  
  893. MPI_Init(&argc, &argv);
  894.  
  895. time_start = MPI_Wtime();
  896.  
  897. int world_size;
  898.  
  899. int my_rank;
  900.  
  901. MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
  902. MPI_Comm_size(MPI_COMM_WORLD, &world_size);
  903.  
  904.  
  905. if(my_rank == 0){
  906. cout << "[INFO]: N = " << N << endl;
  907. cout << "[INFO]: Gx = " << dim[0] << endl;
  908. cout << "[INFO]: Gy = " << dim[1] << endl;
  909. }
  910.  
  911.  
  912.  
  913. if(argc <= 3){
  914. if(my_rank == 0)
  915. cout << "[ERROR]: Usage: mpieval <N> <Gx> <Gy>" << endl;
  916.  
  917. MPI_Abort(MPI_COMM_WORLD, 1);
  918. }
  919.  
  920. if(Gx * Gy != world_size){
  921. if(my_rank == 0)
  922. cout << "[ERROR]: mpi world size is not equal to "<< Gx << "*" << Gy << endl;
  923.  
  924. MPI_Abort(MPI_COMM_WORLD, 1);
  925. }
  926.  
  927. MPI_Cart_create(MPI_COMM_WORLD, 2, dim, period, 1, &comm);
  928.  
  929. /*if(my_rank == 0)
  930. cout << "[INFO]: Cart created"<<endl;
  931. */
  932.  
  933. MPI_Comm_rank(comm, &my_rank);
  934.  
  935. int my_coords[2];
  936.  
  937. MPI_Cart_coords(comm, my_rank, 2, my_coords);
  938.  
  939.  
  940.  
  941.  
  942. class MPIComputations w_func(N, N, Gx, Gy, x_left, x_right, y_bottom, y_top, my_coords[0], my_coords[1], my_rank, comm);
  943.  
  944.  
  945. int iteration_num = 0;
  946.  
  947. bool should_i_stop = false;
  948.  
  949. while ( should_i_stop != true ){
  950.  
  951. w_func.DoIteration(should_i_stop);
  952.  
  953. if ( (my_rank == 0) && (iteration_num % 10000 == 0) ){
  954. cout << "[INFO]: Iteration " << iteration_num << " difference with previous is: "<< w_func.GetCurrentNorm() << endl;
  955. }
  956.  
  957. iteration_num++;
  958. }
  959.  
  960. //MPI_Barrier(MPI_COMM_WORLD);
  961.  
  962. //vector <double> local_elements;
  963.  
  964. double comparing = w_func.CompareWithExact();
  965.  
  966. if (my_rank == 0)
  967. cout << "[INFO]: Diff with exact solution: " << comparing << endl;
  968.  
  969.  
  970.  
  971. //myfile.close();
  972.  
  973. time_stop = MPI_Wtime();
  974. if( my_rank == 0 )
  975. cout << "Finished!" << endl
  976. << "Total iterations: " << iteration_num << endl
  977. << "Elapsed time: " << (time_stop - time_start) << endl
  978. << "Time per iteration: " << (time_stop - time_start) / double(iteration_num) << endl;
  979.  
  980. MPI_Finalize();
  981.  
  982. return 0;
  983. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement