Advertisement
manac68974

Untitled

Dec 18th, 2019
93
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 27.14 KB | None | 0 0
  1. #include <mpi.h>
  2. #include <omp.h>
  3.  
  4. #include <iomanip>
  5. #include <iostream>
  6. #include <vector>
  7. #include <map>
  8. #include <cmath>
  9. #include <cstdlib>
  10. #include <algorithm>
  11. #include <fstream>
  12. #include<sstream>
  13.  
  14. using namespace std;
  15.  
  16. class MPIComputations{
  17. int M, N;
  18. int cur_block_size_x, cur_block_size_y, cur_block_global_offset_x, cur_block_global_offset_y;
  19. int Gx, Gy;
  20. int cur_block_global_coors_x, cur_block_global_coors_y;
  21. double h_x, h_y;
  22. int global_block_size_x, global_block_size_y;
  23. int my_rank;
  24. double current_norm;
  25. MPI_Comm comm;
  26. double x_left, x_right, y_bottom, y_top;
  27.  
  28. vector<double> internal_data;
  29. vector<double> old_internal_data;
  30. vector<double> external_data[4];
  31.  
  32. int IsNodeInternalCornerOrSide(int current_node_global_offset_x, int current_node_global_offset_y){
  33.  
  34. //corners
  35. //left bottom corner
  36. if (current_node_global_offset_x == 0 && current_node_global_offset_y == 0){
  37. return 2;
  38. }
  39.  
  40. //left top corner
  41. if (current_node_global_offset_x == 0 && current_node_global_offset_y == N){
  42. return 4;
  43. }
  44.  
  45. //right bottom corner
  46. if (current_node_global_offset_x == M && current_node_global_offset_y == 0){
  47. return 6;
  48. }
  49.  
  50. //right top corner
  51. if (current_node_global_offset_x == M && current_node_global_offset_y == N){
  52. return 8;
  53. }
  54.  
  55. //sides
  56. //left side
  57. if (current_node_global_offset_y >= 1 && current_node_global_offset_y <= N - 1 &&
  58. current_node_global_offset_x == 0){
  59. return 1;
  60. }
  61.  
  62. //right side
  63. if (current_node_global_offset_y >= 1 && current_node_global_offset_y <= N - 1 &&
  64. current_node_global_offset_x == M){
  65. return 3;
  66. }
  67.  
  68. //bottom side
  69. if (current_node_global_offset_x >= 1 && current_node_global_offset_x <= M - 1 &&
  70. current_node_global_offset_y == 0){
  71. return 5;
  72. }
  73.  
  74. //top side
  75. if ((current_node_global_offset_x >= 1 && current_node_global_offset_x <= M - 1 &&
  76. current_node_global_offset_y == N)){
  77. return 7;
  78. }
  79.  
  80. //internal
  81. if ((current_node_global_offset_x >= 1 && current_node_global_offset_x <= M - 1) &&
  82. (current_node_global_offset_y >= 1 && current_node_global_offset_y <= N - 1)){
  83. return 0;
  84. }
  85.  
  86. return -1;
  87. }
  88.  
  89. double k(double x, double y) {
  90. return 1 + pow(x + y, 2);
  91. }
  92.  
  93. double q(double x, double y) {
  94. return 1;
  95. }
  96.  
  97. double u(double x, double y) {
  98. return 2.0 / (1 + pow(x, 2) + pow(y, 2));
  99. }
  100.  
  101. // psi_R(x, y) = k(A2, y) * du/dx(A2, y)
  102. double psi_R(double y) {
  103. return (-12) * (pow((y + 3), 2) + 1) / pow((pow(y, 2) + 10), 2);
  104. }
  105.  
  106. // psi_L(x, y) = -k(A1, y) * du/dx(A1, y)
  107. double psi_L(double y) {
  108. return (-8) * (pow((y - 2), 2) + 1) / pow((pow(y, 2) + 5), 2);
  109. }
  110.  
  111. // psi_T(x, y) = k(x, B2) * du/dy(x, B2)
  112. double psi_T(double x) {
  113. return (-16) * (pow((x + 4), 2) + 1) / pow((pow(x, 2) + 17), 2);
  114. }
  115.  
  116. // psi_B(x, y) = -k(x, B1) * du/dy(x, B1)
  117. double psi_B(double x) {
  118. return (-4) * (pow((x - 1), 2) + 1) / pow((pow(x, 2) + 2), 2);
  119. }
  120.  
  121. // right-part function of Poisson equation
  122. double F(double x, double y) {
  123. return 2 * (pow(x,4) + pow(y,4) + 2 * (pow(x,2) + 3) * pow(y,2) + 6 * pow(x,2) + 16*x*y + 5)
  124. / pow((1 + pow(x, 2) + pow(y, 2)), 3);
  125. }
  126.  
  127. //inner_product(A[i], internal_data)
  128. double ComputeMagicInnerProductA_iw (int current_node_global_offset_x, int current_node_global_offset_y){
  129.  
  130. int glob_x = current_node_global_offset_x;
  131. int glob_y = current_node_global_offset_y;
  132.  
  133. double result = 0.0;
  134.  
  135. map <string,bool> neighbours = {
  136. {"left", true},
  137. {"right", true},
  138. {"bottom", true},
  139. {"top", true}
  140. };
  141.  
  142. double left_neighbour = 0.0, right_neighbour = 0.0, bottom_neighbour = 0.0, top_neighbour = 0.0, this_node = 0.0;
  143. double left_coeff = 1.0, right_coeff = 1.0, bottom_coeff = 1.0, top_coeff = 1.0, this_coeff = 1.0;
  144.  
  145. switch (IsNodeInternalCornerOrSide(glob_x, glob_y)){
  146. case 2:
  147. //left bottom corner
  148. neighbours["left"] = false;
  149. neighbours["bottom"] = false;
  150. break;
  151. case 4:
  152. //left top corner
  153. neighbours["left"] = false;
  154. neighbours["top"] = false;
  155. break;
  156. case 6:
  157. //right bottom corner
  158. neighbours["right"] = false;
  159. neighbours["bottom"] = false;
  160. break;
  161. case 8:
  162. //right top corner
  163. neighbours["right"] = false;
  164. neighbours["top"] = false;
  165. break;
  166. case 1:
  167. //left side
  168. neighbours["left"] = false;
  169. break;
  170. case 3:
  171. //right side
  172. neighbours["right"] = false;
  173. break;
  174. case 5:
  175. //bottom side
  176. neighbours["bottom"] = false;
  177. break;
  178. case 7:
  179. //top side
  180. neighbours["top"] = false;
  181. break;
  182. case 0:
  183. //internal
  184. break;
  185. default:
  186. cout << "[ERROR]: Bad global coords compute matrix. Global:" << glob_x << " " << glob_y<<endl;
  187. }
  188.  
  189. if (!neighbours["left"]){
  190. right_coeff = 2.0;
  191. left_coeff = 0.0;
  192. }
  193.  
  194. if (!neighbours["right"]){
  195. left_coeff = 2.0;
  196. right_coeff = 0.0;
  197. }
  198.  
  199. if (!neighbours["bottom"]){
  200. top_coeff = 2.0;
  201. bottom_coeff = 0.0;
  202. }
  203.  
  204. if (!neighbours["top"]){
  205. bottom_coeff = 2.0;
  206. top_coeff = 0.0;
  207. }
  208.  
  209.  
  210.  
  211. if (neighbours["left"]){
  212. left_coeff *= -k(x_left + (glob_x - 0.5) * h_x, y_bottom + glob_y * h_y) / pow(h_x, 2);
  213. left_neighbour = Get(glob_x - 1, glob_y);
  214. }
  215.  
  216. if (neighbours["right"]){
  217. right_coeff *= -k(x_left + (glob_x + 0.5) * h_x, y_bottom + glob_y * h_y) / pow(h_x, 2);
  218. right_neighbour = Get(glob_x + 1, glob_y);
  219. }
  220.  
  221. if (neighbours["bottom"]){
  222. bottom_coeff *= -k(x_left + glob_x * h_x, y_bottom + (glob_y - 0.5) * h_y) / pow(h_y, 2);
  223. bottom_neighbour = Get(glob_x, glob_y - 1);
  224. }
  225.  
  226. if (neighbours["top"]){
  227. top_coeff *= -k(x_left + glob_x * h_x, y_bottom + (glob_y + 0.5) * h_y) / pow(h_y, 2);
  228. top_neighbour = Get(glob_x, glob_y + 1);
  229. }
  230.  
  231. this_coeff = q(x_left + glob_x * h_x, y_bottom + glob_y * h_y) - left_coeff - right_coeff - bottom_coeff - top_coeff;
  232. this_node = Get(glob_x, glob_y);
  233.  
  234. result = left_coeff * left_neighbour +
  235. right_coeff * right_neighbour +
  236. bottom_coeff * bottom_neighbour +
  237. top_coeff * top_neighbour +
  238. this_coeff * this_node;
  239.  
  240. return result;
  241. }
  242.  
  243. double GetNodeFromB(int current_node_global_offset_x, int current_node_global_offset_y) {
  244.  
  245. int glob_x = current_node_global_offset_x;
  246. int glob_y = current_node_global_offset_y;
  247.  
  248. double result = 0.0;
  249.  
  250. switch (IsNodeInternalCornerOrSide(glob_x, glob_y)){
  251. case 2:
  252. //left bottom corner
  253. result = F(x_left, y_bottom) + 2.0 / h_x * psi_L(y_bottom) + 2.0 / h_y * psi_B(x_left);
  254. break;
  255. case 4:
  256. //left top corner
  257. result = F(x_left, y_top) + 2.0 / h_x * psi_L(y_top) + 2.0 / h_y * psi_T(x_left);
  258. break;
  259. case 6:
  260. //right bottom corner
  261. result = F(x_right, y_bottom) + 2.0 / h_x * psi_R(y_bottom) + 2.0 / h_y * psi_B(x_right);
  262. break;
  263. case 8:
  264. //right top corner
  265. result = F(x_right, y_top) + 2.0 / h_x * psi_R(y_top) + 2.0 / h_y * psi_T(x_right);
  266. break;
  267. case 1:
  268. //left side
  269. result = F(x_left, y_bottom + glob_y * h_y) + 2.0 / h_x * psi_L(y_bottom + glob_y * h_y);
  270. break;
  271. case 3:
  272. //right side
  273. result = F(x_right, y_bottom + glob_y * h_y) + 2.0 / h_x * psi_R(y_bottom + glob_y * h_y);
  274. break;
  275. case 5:
  276. //bottom side
  277. result = F(x_left + glob_x * h_x, y_bottom) + 2.0 / h_y * psi_B(x_left + glob_x * h_x);
  278. break;
  279. case 7:
  280. //top side
  281. result = F(x_left + glob_x * h_x, y_top) + 2.0 / h_y * psi_T(x_left + glob_x * h_x);
  282. break;
  283. case 0:
  284. //internal
  285. result = F(x_left + glob_x * h_x, y_bottom + glob_y * h_y);
  286. break;
  287. default:
  288. cout << "[ERROR]: Bad global coords compute matrix. Global:" << glob_x << " " << glob_y <<endl;
  289.  
  290. }
  291.  
  292. return result;
  293.  
  294. }
  295.  
  296. double GetNodeFromExact(int current_node_global_offset_x, int current_node_global_offset_y) {
  297.  
  298. int glob_x = current_node_global_offset_x;
  299. int glob_y = current_node_global_offset_y;
  300.  
  301. return u(x_left + glob_x * h_x, y_bottom + glob_y * h_y);
  302.  
  303. }
  304.  
  305. void ComputeMatrixR(){
  306. /*if(my_rank == 0)
  307. cout << "[INFO]: Computation of matrix r started"<<endl;
  308. */
  309.  
  310. vector<double> r_tmp_matrix (cur_block_size_x*cur_block_size_y, 0.0);
  311.  
  312. //#pragma omp parallel for
  313. for(int i = 0; i < cur_block_size_x; ++i){
  314. for(int j = 0; j < cur_block_size_y; ++j){
  315.  
  316. int current_node_global_offset_x = GetGlobalX(i),
  317. current_node_global_offset_y = GetGlobalY(j);
  318.  
  319. int glob_x = current_node_global_offset_x,
  320. glob_y = current_node_global_offset_y;
  321.  
  322. r_tmp_matrix [ j + cur_block_size_y*i ] = ComputeMagicInnerProductA_iw(glob_x,glob_y) - GetNodeFromB(glob_x, glob_y);
  323.  
  324. }
  325. }
  326.  
  327. //#pragma omp parallel for
  328. for(int i = 0; i < cur_block_size_x; ++i){
  329. for(int j = 0; j < cur_block_size_y; ++j){
  330.  
  331. old_internal_data[ j + cur_block_size_y*i ] = internal_data[ j + cur_block_size_y*i];
  332. internal_data[ j + cur_block_size_y*i ] = r_tmp_matrix[ j + cur_block_size_y*i];
  333.  
  334. }
  335. }
  336.  
  337. //old_internal_data = internal_data;
  338. //internal_data = r_tmp_matrix;
  339.  
  340. SyncMPI();
  341.  
  342. }
  343.  
  344. double ComputeTauAndStopCase(bool &should_i_stop){
  345.  
  346. double local_Ar_r_inner_product_sum = 0.0;
  347. double local_Ar_Ar_inner_product_sum = 0.0;
  348. double global_Ar_r_inner_product_sum = 0.0;
  349. double global_Ar_Ar_inner_product_sum = 0.0;
  350. double local_r_norm = 0.0;
  351. double global_r_norm = 0.0;
  352.  
  353. //#pragma omp parallel for
  354. for(int i = 0; i < cur_block_size_x; ++i){
  355. for(int j = 0; j < cur_block_size_y; ++j){
  356. double rho = 1.0;
  357.  
  358. int current_node_global_offset_x = GetGlobalX(i),
  359. current_node_global_offset_y = GetGlobalY(j);
  360.  
  361. int glob_x = current_node_global_offset_x,
  362. glob_y = current_node_global_offset_y;
  363.  
  364. double tmp_Ar_i_j = ComputeMagicInnerProductA_iw(glob_x, glob_y);
  365.  
  366. switch (IsNodeInternalCornerOrSide(glob_x, glob_y)){
  367. case 2:
  368. case 4:
  369. case 6:
  370. case 8:
  371. //angle
  372. rho = 0.25;
  373. break;
  374. case 1:
  375. case 3:
  376. case 5:
  377. case 7:
  378. //side
  379. rho = 0.5;
  380. break;
  381. case 0:
  382. //internal
  383. rho = 1.0;
  384. break;
  385. default:
  386. cout << "[ERROR]: Bad global coords compute tau. Global:" << glob_x << " " << glob_y << endl;
  387. }
  388.  
  389. double tmp_cur_node_value = Get(glob_x, glob_y);
  390.  
  391. local_Ar_r_inner_product_sum += rho * tmp_Ar_i_j * tmp_cur_node_value * h_x*h_y;
  392. local_Ar_Ar_inner_product_sum += rho * pow (tmp_Ar_i_j, 2) * h_x*h_y;
  393. local_r_norm += rho * pow(tmp_cur_node_value, 2) * h_x*h_y;
  394.  
  395. }
  396. }
  397.  
  398. //cout << "[DEBUG]: Local"<< local_Ar_r_inner_product_sum << endl;
  399.  
  400. MPI_Allreduce(&local_Ar_r_inner_product_sum, &global_Ar_r_inner_product_sum, 1, MPI_DOUBLE, MPI_SUM,
  401. comm);
  402.  
  403. //cout << "[DEBUG]: "<< global_Ar_r_inner_product_sum << endl;
  404.  
  405. MPI_Allreduce(&local_Ar_Ar_inner_product_sum, &global_Ar_Ar_inner_product_sum, 1, MPI_DOUBLE, MPI_SUM,
  406. MPI_COMM_WORLD);
  407.  
  408. //cout << "[DEBUG]: "<< global_Ar_Ar_inner_product_sum << endl;
  409.  
  410. double global_tau = global_Ar_r_inner_product_sum/ global_Ar_Ar_inner_product_sum;
  411.  
  412. MPI_Allreduce(&local_r_norm, &global_r_norm, 1, MPI_DOUBLE, MPI_SUM,
  413. MPI_COMM_WORLD);
  414.  
  415. double eps = 1e-06;
  416.  
  417. if (global_r_norm < 0){
  418. cout << "[ERROR]: bad global r norm" << endl;
  419. }
  420.  
  421. current_norm = fabs(global_tau)*sqrt(global_r_norm);
  422.  
  423. //if (my_rank == 0)
  424. // cout << "[DEBUG]: solution norm "<< tmp_norm << endl;
  425.  
  426. if (current_norm <= eps){
  427. if (my_rank == 0)
  428. cout << "[INFO]: r norm is " << sqrt(global_r_norm) << endl;
  429. should_i_stop = true;
  430. }else{
  431. should_i_stop = false;
  432. }
  433.  
  434. return global_tau;
  435.  
  436. }
  437.  
  438.  
  439. void ComputeNewW(double tau){
  440. //#pragma omp parallel for
  441. for(int i = 0; i < cur_block_size_x; ++i){
  442. for(int j = 0; j < cur_block_size_y; ++j){
  443. internal_data[ j + cur_block_size_y*i ] = old_internal_data[ j + cur_block_size_y*i ] - tau * internal_data[ j + cur_block_size_y*i ];
  444. //old_internal_data[ j + cur_block_size_y*i ] = 0.0;
  445. }
  446. }
  447. }
  448.  
  449. int GetGlobalX(int i){
  450. return cur_block_global_offset_x + i;
  451. }
  452.  
  453. int GetGlobalY(int j){
  454. return cur_block_global_offset_y + j;
  455. }
  456.  
  457. public:
  458. MPIComputations(int inpM, int inpN, int inpGx, int inpGy, int inpx_left, int inpx_right, int inpy_bottom, int inpy_top, int inp_cur_block_global_coors_x, int inp_cur_block_global_coors_y, int inprank, MPI_Comm inpcomm){
  459.  
  460. M = inpM;
  461. N = inpN;
  462.  
  463. Gx = inpGx;
  464. Gy = inpGy;
  465.  
  466. x_left = inpx_left;
  467. x_right = inpx_right;
  468. y_bottom = inpy_bottom;
  469. y_top = inpy_top;
  470.  
  471. h_x = double((x_right - x_left)) / M;
  472. h_y = double((y_top - y_bottom)) / N;
  473.  
  474. my_rank = inprank;
  475. comm = inpcomm;
  476.  
  477. cur_block_global_coors_x = inp_cur_block_global_coors_x;
  478. cur_block_global_coors_y = inp_cur_block_global_coors_y;
  479.  
  480. global_block_size_x = (M + 1) / Gx;
  481. global_block_size_y = (N + 1) / Gy;
  482.  
  483. cur_block_size_x = global_block_size_x;
  484. cur_block_size_y = global_block_size_y;
  485.  
  486. cur_block_global_offset_x = global_block_size_x * cur_block_global_coors_x;
  487. cur_block_global_offset_y = global_block_size_y * cur_block_global_coors_y;
  488.  
  489. //if ((cur_block_global_offset_x + global_block_size_x + Gx) > (M + 1)){
  490. if(cur_block_global_coors_x == Gx - 1 ){
  491. cur_block_size_x += (M + 1) % Gx;
  492. }
  493.  
  494. //if ((cur_block_global_offset_y + global_block_size_y + Gy) > (N + 1)){
  495. if(cur_block_global_coors_y == Gy - 1){
  496. cur_block_size_y += (N + 1) % Gy;
  497. }
  498.  
  499. internal_data.resize(cur_block_size_x * cur_block_size_y);
  500. old_internal_data.resize(cur_block_size_x * cur_block_size_y);
  501.  
  502.  
  503. //OX
  504. external_data[0].resize(cur_block_size_y);
  505. external_data[1].resize(cur_block_size_y);
  506.  
  507. //OY
  508. external_data[2].resize(cur_block_size_x);
  509. external_data[3].resize(cur_block_size_x);
  510.  
  511. }
  512.  
  513. double Get(int i, int j) {
  514. return GetLocalIndex(i - cur_block_global_offset_x, j - cur_block_global_offset_y);
  515. }
  516.  
  517. void Set(int i, int j, double v) {
  518. return SetLocalIndex(i - cur_block_global_offset_x, j - cur_block_global_offset_y, v);
  519. }
  520.  
  521. void SyncMPI(){
  522.  
  523. /*if(my_rank == 0)
  524. cout << "[INFO]: Sync started"<< endl;
  525. */
  526.  
  527. //MPI_Barrier(MPI_COMM_WORLD);
  528.  
  529. //left and right sides
  530. //#pragma omp parallel for
  531. for(int j = 0; j < cur_block_size_y; ++j){
  532.  
  533. external_data[ 0 ][ j ] = GetLocalIndex(0, j);//internal_data[ j ];
  534. external_data[ 1 ][ j ] = GetLocalIndex(cur_block_size_x - 1, j);//internal_data[ j + cur_block_size_y * (cur_block_size_x - 1) ];
  535.  
  536. }
  537.  
  538. //bottom and top sides
  539. //#pragma omp parallel for
  540. for(int i = 0; i < cur_block_size_x; ++i){
  541.  
  542. external_data[ 2 ][ i ] = GetLocalIndex(i, 0);//internal_data[ cur_block_size_y*i ];
  543. external_data[ 3 ][ i ] = GetLocalIndex(i, cur_block_size_y - 1); //internal_data[ (cur_block_size_y - 1) + cur_block_size_y*i ];
  544.  
  545. }
  546.  
  547. int my_coords[2];
  548. int targets_ranks[4];
  549.  
  550. MPI_Cart_coords(comm, my_rank, 2, my_coords);
  551.  
  552. int neighbour_offsets[ 4 ][ 2 ] = {
  553. { -1, 0 },{ 1, 0 },
  554. { 0, -1 },{ 0, 1 }
  555. };
  556.  
  557. //#pragma omp parallel for
  558. for(int i = 0; i < 4; i++){
  559.  
  560. int target_coords[2];
  561.  
  562. target_coords[ 0 ] = my_coords[ 0 ] + neighbour_offsets[ i ][ 0 ];
  563. target_coords[ 1 ] = my_coords[ 1 ] + neighbour_offsets[ i ][ 1 ];
  564.  
  565. if (target_coords[0] >= 0 && target_coords[0] < Gx && target_coords[1] >= 0 && target_coords[1] < Gy){
  566.  
  567. MPI_Cart_rank(comm, target_coords, &targets_ranks[ i ]);
  568.  
  569. }
  570. else{
  571. targets_ranks[i] = -1;
  572. }
  573. }
  574.  
  575. //Now we have rank for all targets
  576.  
  577. for(int axis = 0; axis < 2; axis++){
  578.  
  579. int parity_bit = (my_coords[ axis ]) % 2;
  580.  
  581. //if parity_bit == 0 then
  582. // zuerst mit links, dann mit rechts tauschen
  583. //elif parity_bit == 1:
  584. // zuerst mit rechts, dann mit links tauschen
  585.  
  586. for(int tmp = 0; tmp < 2; tmp++){
  587. parity_bit = 1 - parity_bit;
  588.  
  589. //target id in external_data und targets_ranks
  590. int target_idx = 2 * axis + (1 - parity_bit);
  591.  
  592. if (targets_ranks[target_idx] != -1){
  593.  
  594. int send_tag = 100000 + my_rank * 100 + axis * 10 + parity_bit;
  595. int recv_tag = 100000 + targets_ranks[ target_idx ] * 100 + axis * 10 + (1-parity_bit);
  596.  
  597. MPI_Status tmp_status;
  598.  
  599. if(my_rank != targets_ranks[ target_idx ]){
  600.  
  601. MPI_Sendrecv_replace(&external_data[ target_idx ][ 0 ], external_data[ target_idx ].size(),
  602. MPI_DOUBLE, targets_ranks[ target_idx ], send_tag, targets_ranks[ target_idx ], recv_tag,
  603. comm, &tmp_status);
  604.  
  605. }
  606. }
  607. }
  608. }
  609.  
  610. //MPI_Barrier(MPI_COMM_WORLD);
  611.  
  612. }
  613.  
  614. void DoIteration(bool &should_i_stop){
  615.  
  616. ComputeMatrixR();
  617.  
  618. //Now R Matrix is in internal_data
  619.  
  620. double tau = ComputeTauAndStopCase(should_i_stop);
  621.  
  622. //We have in our block jetzt:
  623. //in internal_data: R Matrix
  624. //in old_internal_data: w aus letzte iteration
  625. //and we have tau
  626. //jetzt koennen wir naechste w finden
  627.  
  628. ComputeNewW(tau);
  629.  
  630. SyncMPI();
  631.  
  632. }
  633.  
  634. double GetLocalIndex(int i, int j){
  635. //internal data
  636. if ((j >= 0) && (j < cur_block_size_y) && (i >= 0) && (i < cur_block_size_x)){
  637. return internal_data[ j + cur_block_size_y*i ];
  638. }
  639.  
  640. //external data
  641. //OX
  642. if((j >= 0) && (j < cur_block_size_y)){
  643.  
  644. if (i == -1)
  645. return external_data[ 0 ][ j ];
  646.  
  647. if (i == cur_block_size_x)
  648. return external_data[ 1 ][ j ];
  649.  
  650. }
  651.  
  652. //OY
  653. if((i >= 0) && (i < cur_block_size_x)){
  654.  
  655. if (j == -1)
  656. return external_data[ 2 ][ i ];
  657. if (j == cur_block_size_y)
  658. return external_data[ 3 ][ i ];
  659.  
  660. }
  661.  
  662. cout << "[ERROR]: bad local index" << endl;
  663.  
  664. return nan("");
  665. }
  666.  
  667. void SetLocalIndex(int i, int j, double v){
  668. if ((j >= 0) && (j < cur_block_size_y) && (i >= 0) && (i < cur_block_size_x)){
  669. internal_data[ j + cur_block_size_y*i ] = v;
  670. }else{
  671. cout << "[ERROR]: trying to set data outside the local area" << endl;
  672. }
  673. }
  674.  
  675. double CompareWithExact() {
  676.  
  677. double local_diff_norm = 0.0;
  678. double global_diff_norm = 0.0;
  679. /*
  680. if (my_rank == 0)
  681. cout << "[INFO]: Starting computing compare with exact" << endl;
  682. */
  683. vector <double> tmp_elements(cur_block_size_x * cur_block_size_y, 0.0);
  684.  
  685. //#pragma omp parallel for
  686. for (int i = 0; i < cur_block_size_x; ++i){
  687. for (int j = 0; j < cur_block_size_y; ++j){
  688.  
  689.  
  690. int current_node_global_offset_x = GetGlobalX(i),
  691. current_node_global_offset_y = GetGlobalY(j);
  692.  
  693. int glob_x = current_node_global_offset_x,
  694. glob_y = current_node_global_offset_y;
  695.  
  696. double tmp_elem = Get(glob_x, glob_y) - GetNodeFromExact(glob_x, glob_y);
  697.  
  698. //local_diff_norm = max( fabs(tmp_elem), local_diff_norm);
  699.  
  700. //local_diff_norm += rho * pow(tmp_elem, 2) * h_x * h_y;
  701.  
  702. tmp_elements [j + cur_block_size_y*i] = tmp_elem;
  703.  
  704. }
  705. }
  706.  
  707. //cout << "[INFO]: local max diff in " << cur_block_global_offset_x << " " << cur_block_global_offset_y << " " << local_diff_norm << endl;
  708.  
  709.  
  710. for (int i = 0; i < cur_block_size_x; ++i){
  711. for (int j = 0; j < cur_block_size_y; ++j){
  712.  
  713. double rho = 1.0;
  714.  
  715. int current_node_global_offset_x = GetGlobalX(i),
  716. current_node_global_offset_y = GetGlobalY(j);
  717.  
  718. int glob_x = current_node_global_offset_x,
  719. glob_y = current_node_global_offset_y;
  720.  
  721. switch (IsNodeInternalCornerOrSide(glob_x, glob_y)){
  722. case 2:
  723. case 4:
  724. case 6:
  725. case 8:
  726. //angle
  727. rho = 0.25;
  728. break;
  729. case 1:
  730. case 3:
  731. case 5:
  732. case 7:
  733. //side
  734. rho = 0.5;
  735. break;
  736. case 0:
  737. //internal
  738. rho = 1.0;
  739. break;
  740. default:
  741. cout << "[ERROR]: Bad global coords compute exact. Local:" << i << " " << j << ". Global:" << glob_x << " " << glob_y <<endl;
  742. }
  743.  
  744.  
  745. double tmp_elem = tmp_elements [j + cur_block_size_y*i];
  746.  
  747. local_diff_norm = max( fabs(tmp_elem), local_diff_norm);
  748. //local_diff_norm += rho * pow(tmp_elem, 2) * h_x * h_y;
  749. }
  750. }
  751.  
  752. MPI_Allreduce(&local_diff_norm, &global_diff_norm, 1, MPI_DOUBLE, MPI_MAX,
  753. MPI_COMM_WORLD);
  754.  
  755. //global_diff_norm = sqrt(global_diff_norm);
  756.  
  757.  
  758. return global_diff_norm;
  759. }
  760.  
  761. double GetCurrentNorm(){
  762. return current_norm;
  763. }
  764. };
  765.  
  766.  
  767.  
  768.  
  769. int main(int argc, char* argv[]){
  770.  
  771. const double x_left = -2, x_right = 3;
  772. const double y_bottom = -1, y_top = 4;
  773.  
  774. double time_start, time_stop;
  775. int N, Gx, Gy;
  776. int dim[2], period[2];
  777.  
  778. MPI_Comm comm;
  779.  
  780. //N - global grid size
  781. N = atoi(argv[1]);
  782.  
  783. //Gx
  784. Gx = dim[0] = atoi(argv[2]);
  785.  
  786. //Gy
  787. Gy = dim[1] = atoi(argv[3]);
  788.  
  789. period[0]=0;
  790. period[1]=0;
  791.  
  792.  
  793. MPI_Init(&argc, &argv);
  794.  
  795. time_start = MPI_Wtime();
  796.  
  797. int world_size;
  798.  
  799. int my_rank;
  800.  
  801. MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
  802. MPI_Comm_size(MPI_COMM_WORLD, &world_size);
  803.  
  804.  
  805. if(my_rank == 0){
  806. cout << "[INFO]: N = " << N << endl;
  807. cout << "[INFO]: Gx = " << dim[0] << endl;
  808. cout << "[INFO]: Gy = " << dim[1] << endl;
  809. }
  810.  
  811.  
  812.  
  813. if(argc <= 3){
  814. if(my_rank == 0)
  815. cout << "[ERROR]: Usage: mpieval <N> <Gx> <Gy>" << endl;
  816.  
  817. MPI_Abort(MPI_COMM_WORLD, 1);
  818. }
  819.  
  820. if(Gx * Gy != world_size){
  821. if(my_rank == 0)
  822. cout << "[ERROR]: mpi world size is not equal to "<< Gx << "*" << Gy << endl;
  823.  
  824. MPI_Abort(MPI_COMM_WORLD, 1);
  825. }
  826.  
  827. MPI_Cart_create(MPI_COMM_WORLD, 2, dim, period, 1, &comm);
  828.  
  829. /*if(my_rank == 0)
  830. cout << "[INFO]: Cart created"<<endl;
  831. */
  832.  
  833. MPI_Comm_rank(comm, &my_rank);
  834.  
  835. int my_coords[2];
  836.  
  837. MPI_Cart_coords(comm, my_rank, 2, my_coords);
  838.  
  839.  
  840.  
  841.  
  842. class MPIComputations w_func(N, N, Gx, Gy, x_left, x_right, y_bottom, y_top, my_coords[0], my_coords[1], my_rank, comm);
  843.  
  844.  
  845. int iteration_num = 0;
  846.  
  847. bool should_i_stop = false;
  848.  
  849. while ( should_i_stop != true ){
  850.  
  851. w_func.DoIteration(should_i_stop);
  852.  
  853. if ( (my_rank == 0) && (iteration_num % 10000 == 0) ){
  854. cout << "[INFO]: Iteration " << iteration_num << " difference with previous is: "<< w_func.GetCurrentNorm() << endl;
  855. }
  856.  
  857. iteration_num++;
  858. }
  859.  
  860. //MPI_Barrier(MPI_COMM_WORLD);
  861.  
  862. //vector <double> local_elements;
  863.  
  864. double comparing = w_func.CompareWithExact();
  865.  
  866. if (my_rank == 0)
  867. cout << "[INFO]: Diff with exact solution: " << comparing << endl;
  868.  
  869.  
  870.  
  871. //myfile.close();
  872.  
  873. time_stop = MPI_Wtime();
  874. if( my_rank == 0 )
  875. cout << "Finished!" << endl
  876. << "Total iterations: " << iteration_num << endl
  877. << "Elapsed time: " << (time_stop - time_start) << endl
  878. << "Time per iteration: " << (time_stop - time_start) / double(iteration_num) << endl;
  879.  
  880. MPI_Finalize();
  881.  
  882. return 0;
  883. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement