manac68974

Untitled

Dec 17th, 2019
53
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 29.44 KB | None | 0 0
  1. #include <mpi.h>
  2. #include <omp.h>
  3.  
  4. #include <iomanip>
  5. #include <iostream>
  6. #include <vector>
  7. #include <map>
  8. #include <cmath>
  9. #include <cstdlib>
  10. #include <algorithm>
  11. #include <fstream>
  12. #include<sstream>
  13.  
  14. using namespace std;
  15.  
  16. class MPIComputations{
  17. int M, N;
  18. int cur_block_size_x, cur_block_size_y, cur_block_global_offset_x, cur_block_global_offset_y;
  19. int Gx, Gy;
  20. int cur_block_global_coors_x, cur_block_global_coors_y;
  21. double h_x, h_y;
  22. int global_block_size_x, global_block_size_y;
  23. int my_rank;
  24. double current_norm;
  25. MPI_Comm comm;
  26. double x_left, x_right, y_bottom, y_top;
  27. ofstream my_file;
  28.  
  29. vector<double> internal_data;
  30. vector<double> old_internal_data;
  31. vector<double> external_data[4];
  32.  
  33. int IsNodeInternalCornerOrSide(int current_node_global_offset_x, int current_node_global_offset_y){
  34.  
  35. //corners
  36. //left bottom corner
  37. if (current_node_global_offset_x == 0 && current_node_global_offset_y == 0){
  38. return 2;
  39. }
  40.  
  41. //left top corner
  42. if (current_node_global_offset_x == 0 && current_node_global_offset_y == N){
  43. return 4;
  44. }
  45.  
  46. //right bottom corner
  47. if (current_node_global_offset_x == M && current_node_global_offset_y == 0){
  48. return 6;
  49. }
  50.  
  51. //right top corner
  52. if (current_node_global_offset_x == M && current_node_global_offset_y == N){
  53. return 8;
  54. }
  55.  
  56. //sides
  57. //left side
  58. if (current_node_global_offset_y >= 1 && current_node_global_offset_y <= N - 1 &&
  59. current_node_global_offset_x == 0){
  60. return 1;
  61. }
  62.  
  63. //right side
  64. if (current_node_global_offset_y >= 1 && current_node_global_offset_y <= N - 1 &&
  65. current_node_global_offset_x == M){
  66. return 3;
  67. }
  68.  
  69. //bottom side
  70. if (current_node_global_offset_x >= 1 && current_node_global_offset_x <= M - 1 &&
  71. current_node_global_offset_y == 0){
  72. return 5;
  73. }
  74.  
  75. //top side
  76. if ((current_node_global_offset_x >= 1 && current_node_global_offset_x <= M - 1 &&
  77. current_node_global_offset_y == N)){
  78. return 7;
  79. }
  80.  
  81. //internal
  82. if ((current_node_global_offset_x >= 1 && current_node_global_offset_x <= M - 1) &&
  83. (current_node_global_offset_y >= 1 && current_node_global_offset_y <= N - 1)){
  84. return 0;
  85. }
  86.  
  87. return -1;
  88. }
  89.  
  90. double k(double x, double y) {
  91. return 1 + pow(x + y, 2);
  92. }
  93.  
  94. double q(double x, double y) {
  95. return 1;
  96. }
  97.  
  98. double u(double x, double y) {
  99. return 2.0 / (1 + pow(x, 2) + pow(y, 2));
  100. }
  101.  
  102. // psi_R(x, y) = k(A2, y) * du/dx(A2, y)
  103. double psi_R(double y) {
  104. return (-12) * (pow((y + 3), 2) + 1) / pow((pow(y, 2) + 10), 2);
  105. }
  106.  
  107. // psi_L(x, y) = -k(A1, y) * du/dx(A1, y)
  108. double psi_L(double y) {
  109. return (-8) * (pow((y - 2), 2) + 1) / pow((pow(y, 2) + 5), 2);
  110. }
  111.  
  112. // psi_T(x, y) = k(x, B2) * du/dy(x, B2)
  113. double psi_T(double x) {
  114. return (-16) * (pow((x + 4), 2) + 1) / pow((pow(x, 2) + 17), 2);
  115. }
  116.  
  117. // psi_B(x, y) = -k(x, B1) * du/dy(x, B1)
  118. double psi_B(double x) {
  119. return (-4) * (pow((x - 1), 2) + 1) / pow((pow(x, 2) + 2), 2);
  120. }
  121.  
  122. // right-part function of Poisson equation
  123. double F(double x, double y) {
  124. return 2 * (pow(x,4) + pow(y,4) + 2 * (pow(x,2) + 3) * pow(y,2) + 6 * pow(x,2) + 16*x*y + 5)
  125. / pow((1 + pow(x, 2) + pow(y, 2)), 3);
  126. }
  127.  
  128. //inner_product(A[i], internal_data)
  129. double ComputeMagicInnerProductA_iw (int current_node_global_offset_x, int current_node_global_offset_y){
  130.  
  131. int glob_x = current_node_global_offset_x;
  132. int glob_y = current_node_global_offset_y;
  133.  
  134. double result = 0.0;
  135.  
  136. map <string,bool> neighbours = {
  137. {"left", true},
  138. {"right", true},
  139. {"bottom", true},
  140. {"top", true}
  141. };
  142.  
  143. double left_neighbour = 0.0, right_neighbour = 0.0, bottom_neighbour = 0.0, top_neighbour = 0.0, this_node = 0.0;
  144. double left_coeff = 1.0, right_coeff = 1.0, bottom_coeff = 1.0, top_coeff = 1.0, this_coeff = 1.0;
  145.  
  146. switch (IsNodeInternalCornerOrSide(glob_x, glob_y)){
  147. case 2:
  148. //left bottom corner
  149. neighbours["left"] = false;
  150. neighbours["bottom"] = false;
  151. break;
  152. case 4:
  153. //left top corner
  154. neighbours["left"] = false;
  155. neighbours["top"] = false;
  156. break;
  157. case 6:
  158. //right bottom corner
  159. neighbours["right"] = false;
  160. neighbours["bottom"] = false;
  161. break;
  162. case 8:
  163. //right top corner
  164. neighbours["right"] = false;
  165. neighbours["top"] = false;
  166. break;
  167. case 1:
  168. //left side
  169. neighbours["left"] = false;
  170. break;
  171. case 3:
  172. //right side
  173. neighbours["right"] = false;
  174. break;
  175. case 5:
  176. //bottom side
  177. neighbours["bottom"] = false;
  178. break;
  179. case 7:
  180. //top side
  181. neighbours["top"] = false;
  182. break;
  183. case 0:
  184. //internal
  185. break;
  186. default:
  187. cout << "[ERROR]: Bad global coords compute matrix. Global:" << glob_x << " " << glob_y<<endl;
  188. }
  189.  
  190. if (!neighbours["left"]){
  191. right_coeff = 2.0;
  192. left_coeff = 0.0;
  193. }
  194.  
  195. if (!neighbours["right"]){
  196. left_coeff = 2.0;
  197. right_coeff = 0.0;
  198. }
  199.  
  200. if (!neighbours["bottom"]){
  201. top_coeff = 2.0;
  202. bottom_coeff = 0.0;
  203. }
  204.  
  205. if (!neighbours["top"]){
  206. bottom_coeff = 2.0;
  207. top_coeff = 0.0;
  208. }
  209.  
  210.  
  211.  
  212. if (neighbours["left"]){
  213. left_coeff *= -k(x_left + (glob_x - 0.5) * h_x, y_bottom + glob_y * h_y) / pow(h_x, 2);
  214. left_neighbour = Get(glob_x - 1, glob_y);
  215. }
  216.  
  217. if (neighbours["right"]){
  218. right_coeff *= -k(x_left + (glob_x + 0.5) * h_x, y_bottom + glob_y * h_y) / pow(h_x, 2);
  219. right_neighbour = Get(glob_x + 1, glob_y);
  220. }
  221.  
  222. if (neighbours["bottom"]){
  223. bottom_coeff *= -k(x_left + glob_x * h_x, y_bottom + (glob_y - 0.5) * h_y) / pow(h_y, 2);
  224. bottom_neighbour = Get(glob_x, glob_y - 1);
  225. }
  226.  
  227. if (neighbours["top"]){
  228. top_coeff *= -k(x_left + glob_x * h_x, y_bottom + (glob_y + 0.5) * h_y) / pow(h_y, 2);
  229. top_neighbour = Get(glob_x, glob_y + 1);
  230. }
  231.  
  232. this_coeff = q(x_left + glob_x * h_x, y_bottom + glob_y * h_y) - left_coeff - right_coeff - bottom_coeff - top_coeff;
  233. this_node = Get(glob_x, glob_y);
  234.  
  235. result = left_coeff * left_neighbour +
  236. right_coeff * right_neighbour +
  237. bottom_coeff * bottom_neighbour +
  238. top_coeff * top_neighbour +
  239. this_coeff * this_node;
  240.  
  241. return result;
  242. }
  243.  
  244. double GetNodeFromB(int current_node_global_offset_x, int current_node_global_offset_y) {
  245.  
  246. int glob_x = current_node_global_offset_x;
  247. int glob_y = current_node_global_offset_y;
  248.  
  249. double result = 0.0;
  250.  
  251. switch (IsNodeInternalCornerOrSide(glob_x, glob_y)){
  252. case 2:
  253. //left bottom corner
  254. result = F(x_left, y_bottom) + 2.0 / h_x * psi_L(y_bottom) + 2.0 / h_y * psi_B(x_left);
  255. break;
  256. case 4:
  257. //left top corner
  258. result = F(x_left, y_top) + 2.0 / h_x * psi_L(y_top) + 2.0 / h_y * psi_T(x_left);
  259. break;
  260. case 6:
  261. //right bottom corner
  262. result = F(x_right, y_bottom) + 2.0 / h_x * psi_R(y_bottom) + 2.0 / h_y * psi_B(x_right);
  263. break;
  264. case 8:
  265. //right top corner
  266. result = F(x_right, y_top) + 2.0 / h_x * psi_R(y_top) + 2.0 / h_y * psi_T(x_right);
  267. break;
  268. case 1:
  269. //left side
  270. result = F(x_left, y_bottom + glob_y * h_y) + 2.0 / h_x * psi_L(y_bottom + glob_y * h_y);
  271. break;
  272. case 3:
  273. //right side
  274. result = F(x_right, y_bottom + glob_y * h_y) + 2.0 / h_x * psi_R(y_bottom + glob_y * h_y);
  275. break;
  276. case 5:
  277. //bottom side
  278. result = F(x_left + glob_x * h_x, y_bottom) + 2.0 / h_y * psi_B(x_left + glob_x * h_x);
  279. break;
  280. case 7:
  281. //top side
  282. result = F(x_left + glob_x * h_x, y_top) + 2.0 / h_y * psi_T(x_left + glob_x * h_x);
  283. break;
  284. case 0:
  285. //internal
  286. result = F(x_left + glob_x * h_x, y_bottom + glob_y * h_y);
  287. break;
  288. default:
  289. cout << "[ERROR]: Bad global coords compute matrix. Global:" << glob_x << " " << glob_y <<endl;
  290.  
  291. }
  292.  
  293. return result;
  294.  
  295. }
  296.  
  297. double GetNodeFromExact(int current_node_global_offset_x, int current_node_global_offset_y) {
  298.  
  299. int glob_x = current_node_global_offset_x;
  300. int glob_y = current_node_global_offset_y;
  301.  
  302. return u(x_left + glob_x * h_x, y_bottom + glob_y * h_y);
  303.  
  304. }
  305.  
  306. void ComputeMatrixR(){
  307. /*if(my_rank == 0)
  308. cout << "[INFO]: Computation of matrix r started"<<endl;
  309. */
  310.  
  311. vector<double> r_tmp_matrix (cur_block_size_x*cur_block_size_y, 0.0);
  312.  
  313. //#pragma omp parallel for
  314. for(int i = 0; i < cur_block_size_x; ++i){
  315. for(int j = 0; j < cur_block_size_y; ++j){
  316.  
  317. int current_node_global_offset_x = GetGlobalX(i),
  318. current_node_global_offset_y = GetGlobalY(j);
  319.  
  320. int glob_x = current_node_global_offset_x,
  321. glob_y = current_node_global_offset_y;
  322.  
  323. r_tmp_matrix [ j + cur_block_size_y*i ] = ComputeMagicInnerProductA_iw(glob_x,glob_y) - GetNodeFromB(glob_x, glob_y);
  324.  
  325. }
  326. }
  327.  
  328. //#pragma omp parallel for
  329. for(int i = 0; i < cur_block_size_x; ++i){
  330. for(int j = 0; j < cur_block_size_y; ++j){
  331.  
  332. old_internal_data[ j + cur_block_size_y*i ] = internal_data[ j + cur_block_size_y*i];
  333. internal_data[ j + cur_block_size_y*i ] = r_tmp_matrix[ j + cur_block_size_y*i];
  334.  
  335. }
  336. }
  337.  
  338. //old_internal_data = internal_data;
  339. //internal_data = r_tmp_matrix;
  340.  
  341. SyncMPI();
  342.  
  343. }
  344.  
  345. double ComputeTauAndStopCase(bool &should_i_stop){
  346.  
  347. double local_Ar_r_inner_product_sum = 0.0;
  348. double local_Ar_Ar_inner_product_sum = 0.0;
  349. double global_Ar_r_inner_product_sum = 0.0;
  350. double global_Ar_Ar_inner_product_sum = 0.0;
  351. double local_r_norm = 0.0;
  352. double global_r_norm = 0.0;
  353.  
  354. //#pragma omp parallel for
  355. for(int i = 0; i < cur_block_size_x; ++i){
  356. for(int j = 0; j < cur_block_size_y; ++j){
  357. double rho = 1.0;
  358.  
  359. int current_node_global_offset_x = GetGlobalX(i),
  360. current_node_global_offset_y = GetGlobalY(j);
  361.  
  362. int glob_x = current_node_global_offset_x,
  363. glob_y = current_node_global_offset_y;
  364.  
  365. double tmp_Ar_i_j = ComputeMagicInnerProductA_iw(glob_x, glob_y);
  366.  
  367. switch (IsNodeInternalCornerOrSide(glob_x, glob_y)){
  368. case 2:
  369. case 4:
  370. case 6:
  371. case 8:
  372. //angle
  373. rho = 0.25;
  374. break;
  375. case 1:
  376. case 3:
  377. case 5:
  378. case 7:
  379. //side
  380. rho = 0.5;
  381. break;
  382. case 0:
  383. //internal
  384. rho = 1.0;
  385. break;
  386. default:
  387. cout << "[ERROR]: Bad global coords compute tau. Global:" << glob_x << " " << glob_y << endl;
  388. }
  389.  
  390. double tmp_cur_node_value = Get(glob_x, glob_y);
  391.  
  392. local_Ar_r_inner_product_sum += rho * tmp_Ar_i_j * tmp_cur_node_value * h_x*h_y;
  393. local_Ar_Ar_inner_product_sum += rho * pow (tmp_Ar_i_j, 2) * h_x*h_y;
  394. local_r_norm += rho * pow(tmp_cur_node_value, 2) * h_x*h_y;
  395.  
  396. }
  397. }
  398.  
  399. //cout << "[DEBUG]: Local"<< local_Ar_r_inner_product_sum << endl;
  400.  
  401. MPI_Allreduce(&local_Ar_r_inner_product_sum, &global_Ar_r_inner_product_sum, 1, MPI_DOUBLE, MPI_SUM,
  402. comm);
  403.  
  404. //cout << "[DEBUG]: "<< global_Ar_r_inner_product_sum << endl;
  405.  
  406. MPI_Allreduce(&local_Ar_Ar_inner_product_sum, &global_Ar_Ar_inner_product_sum, 1, MPI_DOUBLE, MPI_SUM,
  407. MPI_COMM_WORLD);
  408.  
  409. //cout << "[DEBUG]: "<< global_Ar_Ar_inner_product_sum << endl;
  410.  
  411. double global_tau = global_Ar_r_inner_product_sum/ global_Ar_Ar_inner_product_sum;
  412.  
  413. MPI_Allreduce(&local_r_norm, &global_r_norm, 1, MPI_DOUBLE, MPI_SUM,
  414. MPI_COMM_WORLD);
  415.  
  416. double eps = 1e-06;
  417.  
  418. if (global_r_norm < 0){
  419. cout << "[ERROR]: bad global r norm" << endl;
  420. }
  421.  
  422. current_norm = fabs(global_tau)*sqrt(global_r_norm);
  423.  
  424. //if (my_rank == 0)
  425. // cout << "[DEBUG]: solution norm "<< tmp_norm << endl;
  426.  
  427. if (current_norm <= eps){
  428. if (my_rank == 0)
  429. cout << "[INFO]: r norm is " << sqrt(global_r_norm) << endl;
  430. should_i_stop = true;
  431. }else{
  432. should_i_stop = false;
  433. }
  434.  
  435. return global_tau;
  436.  
  437. }
  438.  
  439.  
  440. void ComputeNewW(double tau){
  441. //#pragma omp parallel for
  442. for(int i = 0; i < cur_block_size_x; ++i){
  443. for(int j = 0; j < cur_block_size_y; ++j){
  444. internal_data[ j + cur_block_size_y*i ] = old_internal_data[ j + cur_block_size_y*i ] - tau * internal_data[ j + cur_block_size_y*i ];
  445. //old_internal_data[ j + cur_block_size_y*i ] = 0.0;
  446. }
  447. }
  448. }
  449.  
  450. int GetGlobalX(int i){
  451. return cur_block_global_offset_x + i;
  452. }
  453.  
  454. int GetGlobalY(int j){
  455. return cur_block_global_offset_y + j;
  456. }
  457.  
  458. public:
  459. MPIComputations(int inpM, int inpN, int inpGx, int inpGy, int inpx_left, int inpx_right, int inpy_bottom, int inpy_top, int inp_cur_block_global_coors_x, int inp_cur_block_global_coors_y, int inprank, MPI_Comm inpcomm){
  460.  
  461. M = inpM;
  462. N = inpN;
  463.  
  464. Gx = inpGx;
  465. Gy = inpGy;
  466.  
  467. x_left = inpx_left;
  468. x_right = inpx_right;
  469. y_bottom = inpy_bottom;
  470. y_top = inpy_top;
  471.  
  472. h_x = double((x_right - x_left)) / M;
  473. h_y = double((y_top - y_bottom)) / N;
  474.  
  475. my_rank = inprank;
  476. comm = inpcomm;
  477.  
  478. ofstream myfile;
  479. stringstream ss;
  480. ss<< "./file" << inp_cur_block_global_coors_x << "_" << inp_cur_block_global_coors_y << ".res";
  481.  
  482. string filename;
  483.  
  484. ss >> filename;
  485.  
  486. my_file.open (filename);
  487.  
  488.  
  489. cur_block_global_coors_x = inp_cur_block_global_coors_x;
  490. cur_block_global_coors_y = inp_cur_block_global_coors_y;
  491.  
  492. global_block_size_x = (M + 1) / Gx;
  493. global_block_size_y = (N + 1) / Gy;
  494.  
  495. cur_block_size_x = global_block_size_x;
  496. cur_block_size_y = global_block_size_y;
  497.  
  498. cur_block_global_offset_x = global_block_size_x * cur_block_global_coors_x;
  499. cur_block_global_offset_y = global_block_size_y * cur_block_global_coors_y;
  500.  
  501. //cout << "I am "<< cur_block_global_coors_x<< " " << cur_block_global_coors_y << " My offset is "<< cur_block_global_offset_x << " " << cur_block_global_offset_y << endl;
  502.  
  503. //if (my_rank)
  504. // cout << "Global Block size " << global_block_size_x << " " global_block_size_y << " Klein:" << (M + 1) % Gx << " " << (N + 1) % Gy << endl;
  505.  
  506.  
  507. //cout << "This is " << cur_block_global_coors_x<< " " << cur_block_global_coors_y << "My xs are" << cur_block_global_offset_x << global_block_size_x << cur_block_global_offset_x + 2 * global_block_size_x << M + 1 << ((cur_block_global_offset_x + 2 * global_block_size_x) > (M + 1)) << endl;
  508. if ((cur_block_global_offset_x + global_block_size_x + Gx) > (M + 1)){
  509. //cout << "I am "<< cur_block_global_coors_x<< " " << cur_block_global_coors_y << ". My cur xblock size is too big" << endl;
  510. cur_block_size_x += (M + 1) % Gx;
  511. }
  512.  
  513. if ((cur_block_global_offset_y + global_block_size_y + Gy) > (N + 1)){
  514. //cout << "I am "<< cur_block_global_coors_x<< " " << cur_block_global_coors_y << ". My cur yblock size is too big" << endl;
  515. cur_block_size_y += (N + 1) % Gy;
  516. }
  517.  
  518. //cout << "I am "<< cur_block_global_coors_x<< " " << cur_block_global_coors_y << ". My cur block size is " << cur_block_size_x << " " << cur_block_size_y << endl;
  519.  
  520. internal_data.resize(cur_block_size_x * cur_block_size_y);
  521. old_internal_data.resize(cur_block_size_x * cur_block_size_y);
  522.  
  523.  
  524. //OX
  525. external_data[0].resize(cur_block_size_y);
  526. external_data[1].resize(cur_block_size_y);
  527.  
  528. //OY
  529. external_data[2].resize(cur_block_size_x);
  530. external_data[3].resize(cur_block_size_x);
  531.  
  532. }
  533.  
  534. double Get(int i, int j) {
  535. return GetLocalIndex(i - cur_block_global_offset_x, j - cur_block_global_offset_y);
  536. }
  537.  
  538. void Set(int i, int j, double v) {
  539. return SetLocalIndex(i - cur_block_global_offset_x, j - cur_block_global_offset_y, v);
  540. }
  541.  
  542. void SyncMPI(){
  543.  
  544. /*if(my_rank == 0)
  545. cout << "[INFO]: Sync started"<< endl;
  546. */
  547.  
  548. MPI_Barrier(MPI_COMM_WORLD);
  549.  
  550. //left and right sides
  551. //#pragma omp parallel for
  552. for(int j = 0; j < cur_block_size_y; ++j){
  553.  
  554. external_data[ 0 ][ j ] = GetLocalIndex(0, j);//internal_data[ j ];
  555. external_data[ 1 ][ j ] = GetLocalIndex(cur_block_size_x - 1, j);//internal_data[ j + cur_block_size_y * (cur_block_size_x - 1) ];
  556.  
  557. }
  558.  
  559. //bottom and top sides
  560. //#pragma omp parallel for
  561. for(int i = 0; i < cur_block_size_x; ++i){
  562.  
  563. external_data[ 2 ][ i ] = GetLocalIndex(i, 0);//internal_data[ cur_block_size_y*i ];
  564. external_data[ 3 ][ i ] = GetLocalIndex(i, cur_block_size_y - 1); //internal_data[ (cur_block_size_y - 1) + cur_block_size_y*i ];
  565.  
  566. }
  567.  
  568. int my_coords[2];
  569. int targets_ranks[4];
  570.  
  571. MPI_Cart_coords(comm, my_rank, 2, my_coords);
  572.  
  573. int neighbour_offsets[ 4 ][ 2 ] = {
  574. { -1, 0 },{ 1, 0 },
  575. { 0, -1 },{ 0, 1 }
  576. };
  577.  
  578. //#pragma omp parallel for
  579. for(int i = 0; i < 4; i++){
  580.  
  581. int target_coords[2];
  582.  
  583. target_coords[ 0 ] = my_coords[ 0 ] + neighbour_offsets[ i ][ 0 ];
  584. target_coords[ 1 ] = my_coords[ 1 ] + neighbour_offsets[ i ][ 1 ];
  585.  
  586. if (target_coords[0] >= 0 && target_coords[0] < Gx && target_coords[1] >= 0 && target_coords[1] < Gy){
  587.  
  588. MPI_Cart_rank(comm, target_coords, &targets_ranks[ i ]);
  589.  
  590. }
  591. else{
  592. targets_ranks[i] = -1;
  593. }
  594. }
  595.  
  596. //Now we have rank for all targets
  597.  
  598. for(int axis = 0; axis < 2; axis++){
  599.  
  600. int parity_bit = (my_coords[ axis ]) % 2;
  601.  
  602. //if parity_bit == 0 then
  603. // zuerst mit links, dann mit rechts tauschen
  604. //elif parity_bit == 1:
  605. // zuerst mit rechts, dann mit links tauschen
  606.  
  607. for(int tmp = 0; tmp < 2; tmp++){
  608. parity_bit = 1 - parity_bit;
  609.  
  610. //target id in external_data und targets_ranks
  611. int target_idx = 2 * axis + (1 - parity_bit);
  612.  
  613. if (targets_ranks[target_idx] != -1){
  614.  
  615. // вычисляем теги отправки и приема
  616. // в них зашиты номер ноды, ось, направление
  617. int send_tag = 100000 + my_rank * 100 + axis * 10 + parity_bit;
  618. int recv_tag = 100000 + targets_ranks[ target_idx ] * 100 + axis * 10 + (1-parity_bit);
  619.  
  620. MPI_Status tmp_status;
  621. // если отправка не на себя, то отправляем
  622. if(my_rank != targets_ranks[ target_idx ]){
  623.  
  624. MPI_Sendrecv_replace(&external_data[ target_idx ][ 0 ], external_data[ target_idx ].size(),
  625. MPI_DOUBLE, targets_ranks[ target_idx ], send_tag, targets_ranks[ target_idx ], recv_tag,
  626. comm, &tmp_status);
  627.  
  628. }
  629. }
  630. }
  631. }
  632.  
  633. MPI_Barrier(MPI_COMM_WORLD);
  634.  
  635. }
  636.  
  637. void DoIteration(bool &should_i_stop){
  638.  
  639. ComputeMatrixR();
  640.  
  641. //Now R Matrix is in internal_data
  642.  
  643. double tau = ComputeTauAndStopCase(should_i_stop);
  644.  
  645. //We have in our block jetzt:
  646. //in internal_data: R Matrix
  647. //in old_internal_data: w aus letzte iteration
  648. //and we have tau
  649. //jetzt koennen wir naechste w finden
  650.  
  651. ComputeNewW(tau);
  652.  
  653. SyncMPI();
  654.  
  655. }
  656.  
  657. double GetLocalIndex(int i, int j){
  658. //internal data
  659. if ((j >= 0) && (j < cur_block_size_y) && (i >= 0) && (i < cur_block_size_x)){
  660. return internal_data[ j + cur_block_size_y*i ];
  661. }
  662.  
  663. //external data
  664. //OX
  665. if((j >= 0) && (j < cur_block_size_y)){
  666.  
  667. if (i == -1)
  668. return external_data[ 0 ][ j ];
  669.  
  670. if (i == cur_block_size_x)
  671. return external_data[ 1 ][ j ];
  672.  
  673. }
  674.  
  675. //OY
  676. if((i >= 0) && (i < cur_block_size_x)){
  677.  
  678. if (j == -1)
  679. return external_data[ 2 ][ i ];
  680. if (j == cur_block_size_y)
  681. return external_data[ 3 ][ i ];
  682.  
  683. }
  684.  
  685. cout << "[ERROR]: bad local index" << endl;
  686.  
  687. return nan("");
  688. }
  689.  
  690. void SetLocalIndex(int i, int j, double v){
  691. if ((j >= 0) && (j < cur_block_size_y) && (i >= 0) && (i < cur_block_size_x)){
  692. internal_data[ j + cur_block_size_y*i ] = v;
  693. }else{
  694. cout << "[ERROR]: trying to set data outside the local area" << endl;
  695. }
  696. }
  697.  
  698. double CompareWithExact(bool print_data_to_files) {
  699.  
  700. double local_diff_norm = 0.0;
  701. double global_diff_norm = 0.0;
  702. /*
  703. if (my_rank == 0)
  704. cout << "[INFO]: Starting computing compare with exact" << endl;
  705. */
  706. vector <double> tmp_elements(cur_block_size_x * cur_block_size_y, 0.0);
  707.  
  708. //#pragma omp parallel for
  709. for (int i = 0; i < cur_block_size_x; ++i){
  710. for (int j = 0; j < cur_block_size_y; ++j){
  711.  
  712.  
  713. int current_node_global_offset_x = GetGlobalX(i),
  714. current_node_global_offset_y = GetGlobalY(j);
  715.  
  716. int glob_x = current_node_global_offset_x,
  717. glob_y = current_node_global_offset_y;
  718.  
  719. double tmp_elem = Get(glob_x, glob_y) - GetNodeFromExact(glob_x, glob_y);
  720.  
  721. //local_diff_norm = max( fabs(tmp_elem), local_diff_norm);
  722.  
  723. //local_diff_norm += rho * pow(tmp_elem, 2) * h_x * h_y;
  724.  
  725. tmp_elements [j + cur_block_size_y*i] = tmp_elem;
  726.  
  727. }
  728. }
  729.  
  730. //cout << "[INFO]: local max diff in " << cur_block_global_offset_x << " " << cur_block_global_offset_y << " " << local_diff_norm << endl;
  731.  
  732.  
  733. for (int i = 0; i < cur_block_size_x; ++i){
  734. for (int j = 0; j < cur_block_size_y; ++j){
  735.  
  736. double rho = 1.0;
  737.  
  738. int current_node_global_offset_x = GetGlobalX(i),
  739. current_node_global_offset_y = GetGlobalY(j);
  740.  
  741. int glob_x = current_node_global_offset_x,
  742. glob_y = current_node_global_offset_y;
  743.  
  744. switch (IsNodeInternalCornerOrSide(glob_x, glob_y)){
  745. case 2:
  746. case 4:
  747. case 6:
  748. case 8:
  749. //angle
  750. rho = 0.25;
  751. break;
  752. case 1:
  753. case 3:
  754. case 5:
  755. case 7:
  756. //side
  757. rho = 0.5;
  758. break;
  759. case 0:
  760. //internal
  761. rho = 1.0;
  762. break;
  763. default:
  764. cout << "[ERROR]: Bad global coords compute exact. Local:" << i << " " << j << ". Global:" << glob_x << " " << glob_y <<endl;
  765. }
  766.  
  767.  
  768. double tmp_elem = tmp_elements [j + cur_block_size_y*i];
  769.  
  770. local_diff_norm = max( fabs(tmp_elem), local_diff_norm);
  771. //local_diff_norm += rho * pow(tmp_elem, 2) * h_x * h_y;
  772. }
  773. }
  774.  
  775. MPI_Allreduce(&local_diff_norm, &global_diff_norm, 1, MPI_DOUBLE, MPI_MAX,
  776. MPI_COMM_WORLD);
  777.  
  778. //global_diff_norm = sqrt(global_diff_norm);
  779.  
  780.  
  781.  
  782.  
  783.  
  784. if (print_data_to_files){
  785. for (int i = 0; i < cur_block_size_x; ++i){
  786. for (int j = 0; j < cur_block_size_y; ++j){
  787.  
  788. int current_node_global_offset_x = GetGlobalX(i),
  789. current_node_global_offset_y = GetGlobalY(j);
  790.  
  791. int glob_x = current_node_global_offset_x,
  792. glob_y = current_node_global_offset_y;
  793.  
  794. my_file << Get(glob_x, glob_y) << ":" << GetNodeFromExact(glob_x, glob_y) << "\t";
  795. }
  796. my_file << endl;
  797. }
  798. }
  799.  
  800. return global_diff_norm;
  801. }
  802.  
  803. double GetCurrentNorm(){
  804. return current_norm;
  805. }
  806. };
  807.  
  808.  
  809.  
  810.  
  811. int main(int argc, char* argv[]){
  812.  
  813. const double x_left = -2, x_right = 3;
  814. const double y_bottom = -1, y_top = 4;
  815.  
  816. double time_start, time_stop;
  817. int N, Gx, Gy;
  818. int dim[2], period[2];
  819.  
  820. MPI_Comm comm;
  821.  
  822. //N - global grid size
  823. N = atoi(argv[1]);
  824.  
  825. //Gx
  826. Gx = dim[0] = atoi(argv[2]);
  827.  
  828. //Gy
  829. Gy = dim[1] = atoi(argv[3]);
  830.  
  831. bool print_data_to_files = false;
  832.  
  833. if (argc == 4){
  834. print_data_to_files = true;
  835. }
  836. period[0]=0;
  837. period[1]=0;
  838.  
  839.  
  840. MPI_Init(&argc, &argv);
  841.  
  842. time_start = MPI_Wtime();
  843.  
  844. int world_size;
  845.  
  846. int my_rank;
  847.  
  848. MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
  849. MPI_Comm_size(MPI_COMM_WORLD, &world_size);
  850.  
  851.  
  852. if(my_rank == 0){
  853. cout << "[INFO]: N = " << N << endl;
  854. cout << "[INFO]: Gx = " << dim[0] << endl;
  855. cout << "[INFO]: Gy = " << dim[1] << endl;
  856. }
  857.  
  858.  
  859.  
  860. if(argc <= 3){
  861. if(my_rank == 0)
  862. cout << "[ERROR]: Usage: mpieval <N> <Gx> <Gy> [<print_data_to_files>]" << endl;
  863.  
  864. MPI_Abort(MPI_COMM_WORLD, 1);
  865. }
  866.  
  867. if(Gx * Gy != world_size){
  868. if(my_rank == 0)
  869. cout << "[ERROR]: mpi world size is not equal to "<< Gx << "*" << Gy << endl;
  870.  
  871. MPI_Abort(MPI_COMM_WORLD, 1);
  872. }
  873.  
  874. MPI_Cart_create(MPI_COMM_WORLD, 2, dim, period, 1, &comm);
  875.  
  876. /*if(my_rank == 0)
  877. cout << "[INFO]: Cart created"<<endl;
  878. */
  879.  
  880. MPI_Comm_rank(comm, &my_rank);
  881.  
  882. int my_coords[2];
  883.  
  884. MPI_Cart_coords(comm, my_rank, 2, my_coords);
  885.  
  886.  
  887.  
  888.  
  889. class MPIComputations w_func(N, N, Gx, Gy, x_left, x_right, y_bottom, y_top, my_coords[0], my_coords[1], my_rank, comm);
  890.  
  891.  
  892. int iteration_num = 0;
  893.  
  894. bool should_i_stop = false;
  895.  
  896. while ( should_i_stop != true ){
  897.  
  898. w_func.DoIteration(should_i_stop);
  899.  
  900. if ( (my_rank == 0) && (iteration_num % 10000 == 0) ){
  901. cout << "[INFO]: Iteration " << iteration_num << " difference with previous is: "<< w_func.GetCurrentNorm() << endl;
  902. }
  903.  
  904. iteration_num++;
  905. }
  906.  
  907. MPI_Barrier(MPI_COMM_WORLD);
  908.  
  909. //vector <double> local_elements;
  910.  
  911. double comparing = w_func.CompareWithExact(print_data_to_files);
  912.  
  913. if (my_rank == 0)
  914. cout << "[INFO]: Diff with exact solution: " << comparing << endl;
  915.  
  916.  
  917.  
  918. //myfile.close();
  919.  
  920. time_stop = MPI_Wtime();
  921. if( my_rank == 0 )
  922. cout << "Finished!" << endl
  923. << "Total iterations: " << iteration_num << endl
  924. << "Elapsed time: " << (time_stop - time_start) << endl
  925. << "Time per iteration: " << (time_stop - time_start) / double(iteration_num) << endl;
  926.  
  927. MPI_Finalize();
  928.  
  929. return 0;
  930. }
Add Comment
Please, Sign In to add comment