Advertisement
Guest User

zpragma

a guest
Jan 18th, 2020
113
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 17.95 KB | None | 0 0
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <math.h>
  4. #include <omp.h>
  5.  
  6. #define SCALAR double
  7.  
  8. #define NGAUSS 6
  9. #define NSHAP 6
  10. #define NGEO 6
  11. #define NDOFS 6
  12.  
  13. #define TEST
  14.  
  15. #define zero 0.0
  16. #define one 1.0
  17. #define two 2.0
  18. #define half 0.5
  19. #define one_fourth 0.25
  20. #define one_sixth (0.166666666666666667)
  21.  
  22. #define weight_linear_prism (one_sixth)
  23.  
  24. omp_lock_t lock;
  25.  
  26. int pdr_num_int_el_QSS_prism(
  27. SCALAR *gauss_dat_host, // integration points data of elements having given p
  28. SCALAR *shape_fun_host, // shape functions on a reference element
  29. SCALAR *el_data_in, // data for integration of NR_ELEMS_THIS_KERCALL elements
  30. SCALAR *el_data_out, // result of integration of NR_ELEMS_THIS_KERCALL elements
  31. const int nr_elem_mic,
  32. const int size_el_out,
  33. const int size_el_in,
  34. const int size_shp,
  35. const int geo_dat_size,
  36. const int nr_coeff,
  37. const int one_el_stiff_mat_size,
  38. const int one_el_load_vec_size);
  39.  
  40. int main()
  41. {
  42.  
  43. omp_init_lock(&lock);
  44. srand(time(NULL));
  45. omp_set_num_threads(2);
  46. FILE *input;
  47.  
  48. #ifdef TEST
  49. FILE *output;
  50. #endif
  51.  
  52. input = fopen("input_TEST_PRISM_NORMAL.txt", "r");
  53.  
  54. int Nr_elems_mic, size_in, size_out, size_shap, geo_dat_size, nr_coeff, one_el_stiff_mat_size, one_el_load_vec_size, i;
  55.  
  56. fscanf(input, "%d", &Nr_elems_mic);
  57. printf("Nr_elems_mic=%d\n", Nr_elems_mic);
  58. fscanf(input, "%d", &size_in);
  59. printf("Size_in=%d\n", size_in);
  60. fscanf(input, "%d", &size_out);
  61. printf("Size_out=%d\n", size_out);
  62. fscanf(input, "%d", &size_shap);
  63. printf("Size_shap=%d\n", size_shap);
  64. fscanf(input, "%d", &geo_dat_size);
  65. printf("geo_dat_size=%d\n", geo_dat_size);
  66. fscanf(input, "%d", &nr_coeff);
  67. printf("nr_coeff=%d\n", nr_coeff);
  68. fscanf(input, "%d", &one_el_stiff_mat_size);
  69. printf("one_el_stiff_mat_size=%d\n", one_el_stiff_mat_size);
  70. fscanf(input, "%d", &one_el_load_vec_size);
  71. printf("one_el_load_vec_size=%d\n", one_el_load_vec_size);
  72. printf("\n\n\n\n");
  73.  
  74. SCALAR *gauss_dat_host = (SCALAR *)malloc(1344 * sizeof(SCALAR));
  75. SCALAR *shape_fun_host;
  76. SCALAR *el_data_in;
  77. SCALAR *el_data_out;
  78. #ifdef TEST
  79. SCALAR *testing;
  80. #endif
  81. shape_fun_host = (SCALAR *)malloc(size_shap * sizeof(SCALAR));
  82. el_data_in = (SCALAR *)malloc(size_in * sizeof(SCALAR));
  83. el_data_out = (SCALAR *)malloc(size_out * sizeof(SCALAR));
  84.  
  85. #ifdef TEST
  86. testing = (SCALAR *)malloc(size_out * sizeof(SCALAR));
  87. #endif
  88.  
  89. for (i = 0; i < 1344; i++)
  90. fscanf(input, "%lf", &gauss_dat_host[i]);
  91. printf("Wczytano punkty Gaussa\n");
  92.  
  93. for (i = 0; i < size_shap; i++)
  94. fscanf(input, "%lf", &shape_fun_host[i]);
  95. printf("Wczytano funkcje ksztaltu\n");
  96.  
  97. for (i = 0; i < size_in; i++)
  98. fscanf(input, "%lf", &el_data_in[i]);
  99. printf("Wczytano parametry poczatkowe\n");
  100.  
  101. fclose(input);
  102. #ifdef TEST
  103.  
  104. output = fopen("output_TEST_PRISM_NORMAL.txt", "r");
  105.  
  106. printf("Otwarcie outputu\n");
  107.  
  108. for (i = 0; i < size_out; i++)
  109. fscanf(output, "%lf ", &testing[i]);
  110. fclose(output);
  111.  
  112. #endif
  113.  
  114. printf("Start!\n");
  115. double start = omp_get_wtime();
  116. pdr_num_int_el_QSS_prism(gauss_dat_host, shape_fun_host,
  117. el_data_in, el_data_out, Nr_elems_mic,
  118. size_out, size_in, size_shap, geo_dat_size, nr_coeff, one_el_stiff_mat_size, one_el_load_vec_size);
  119. printf("Time: %f\n", omp_get_wtime() - start);
  120. printf("Koniec!\n");
  121.  
  122. #ifdef TEST
  123.  
  124. SCALAR delta = 0.00001;
  125. for (i = 0; i < size_out; i++)
  126. {
  127. if (fabs(testing[i] - el_data_out[i]) > delta)
  128. {
  129. printf("Error in %d element->test=%.10lf != out=%.10lf\n", i, testing[i], el_data_out[i]);
  130. exit(0);
  131. }
  132. }
  133. printf("Test OK!\n");
  134. #endif
  135. }
  136.  
  137. int pdr_num_int_el_QSS_prism(
  138. SCALAR *gauss_dat_host, // integration points data of elements having given p
  139. SCALAR *shape_fun_host, // shape functions on a reference element
  140. SCALAR *el_data_in, // data for integration of NR_ELEMS_THIS_KERCALL elements
  141. SCALAR *el_data_out, // result of integration of NR_ELEMS_THIS_KERCALL elements
  142. const int nr_elem_mic,
  143. const int size_el_out,
  144. const int size_el_in,
  145. const int size_shp,
  146. const int geo_dat_size,
  147. const int nr_coeff,
  148. const int one_el_stiff_mat_size,
  149. const int one_el_load_vec_size)
  150. {
  151.  
  152. int ielem;
  153. register int offset;
  154.  
  155. #pragma omp parallel for private(ielem, offset)
  156. for (ielem = 0; ielem < nr_elem_mic; ielem++)
  157. {
  158.  
  159. SCALAR tab_fun_u_derx[NSHAP];
  160. SCALAR tab_fun_u_dery[NSHAP];
  161. SCALAR tab_fun_u_derz[NSHAP];
  162.  
  163. SCALAR stiff_mat[NDOFS * NDOFS];
  164. SCALAR load_vec[NDOFS];
  165.  
  166. int i;
  167. //-------------------------------------------------------------
  168. // ******************* READING INPUT DATA *********************
  169.  
  170. offset = nr_elem_mic * geo_dat_size + ielem * nr_coeff;
  171.  
  172. register SCALAR coeff00 = el_data_in[offset + 0];
  173. register SCALAR coeff01 = el_data_in[offset + 1];
  174. register SCALAR coeff02 = el_data_in[offset + 2];
  175. register SCALAR coeff10 = el_data_in[offset + 3];
  176. register SCALAR coeff11 = el_data_in[offset + 4];
  177. register SCALAR coeff12 = el_data_in[offset + 5];
  178. register SCALAR coeff20 = el_data_in[offset + 6];
  179. register SCALAR coeff21 = el_data_in[offset + 7];
  180. register SCALAR coeff22 = el_data_in[offset + 8];
  181. register SCALAR coeff30 = el_data_in[offset + 9];
  182. register SCALAR coeff31 = el_data_in[offset + 10];
  183. register SCALAR coeff32 = el_data_in[offset + 11];
  184. register SCALAR coeff03 = el_data_in[offset + 12];
  185. register SCALAR coeff13 = el_data_in[offset + 13];
  186. register SCALAR coeff23 = el_data_in[offset + 14];
  187. register SCALAR coeff33 = el_data_in[offset + 15];
  188. register SCALAR coeff04 = el_data_in[offset + 16];
  189. register SCALAR coeff14 = el_data_in[offset + 17];
  190. register SCALAR coeff24 = el_data_in[offset + 18];
  191. register SCALAR coeff34 = el_data_in[offset + 19];
  192.  
  193. // ******* THE END OF: READING INPUT DATA *********************
  194. //-------------------------------------------------------------
  195.  
  196. //-------------------------------------------------------------
  197. //******************** INITIALIZING SM AND LV ******************//
  198.  
  199. #pragma omp parallel for private(i) shared(stiff_mat)
  200. for (i = 0; i < NDOFS * NDOFS; i++)
  201. stiff_mat[i] = zero;
  202.  
  203. #pragma omp parallel for private(i) shared(load_vec)
  204. for (i = 0; i < NDOFS; i++)
  205. load_vec[i] = zero;
  206.  
  207. //******************** END OF: INITIALIZING SM AND LV ******************//
  208. //-------------------------------------------------------------
  209.  
  210. //-------------------------------------------------------------
  211. //************************* LOOP OVER INTEGRATION POINTS ************************//
  212.  
  213. // in a loop over gauss points
  214. int igauss;
  215. int idof, jdof;
  216. #pragma omp parallel private(igauss) shared(idof, jdof, load_vec, stiff_mat)
  217. for (igauss = 0; igauss < NGAUSS; igauss++)
  218. {
  219.  
  220. SCALAR daux = gauss_dat_host[4 * igauss];
  221. SCALAR faux = gauss_dat_host[4 * igauss + 1];
  222. SCALAR eaux = gauss_dat_host[4 * igauss + 2];
  223.  
  224. SCALAR vol = weight_linear_prism; // vol = weight CONSTANT FOR LINEAR PRISM!!!
  225. //-------------------------------------------------------------
  226. //************************* JACOBIAN TERMS CALCULATIONS *************************//
  227.  
  228. SCALAR temp1 = zero;
  229. SCALAR temp2 = zero;
  230. SCALAR temp3 = zero;
  231. SCALAR temp4 = zero;
  232. SCALAR temp5 = zero;
  233. SCALAR temp6 = zero;
  234. SCALAR temp7 = zero;
  235. SCALAR temp8 = zero;
  236. SCALAR temp9 = zero;
  237.  
  238. { // block to indicate the scope of jac_x registers
  239.  
  240. SCALAR jac_0 = zero;
  241. SCALAR jac_1 = zero;
  242. SCALAR jac_2 = zero;
  243. SCALAR jac_3 = zero;
  244. SCALAR jac_4 = zero;
  245. SCALAR jac_5 = zero;
  246. SCALAR jac_6 = zero;
  247. SCALAR jac_7 = zero;
  248. SCALAR jac_8 = zero;
  249.  
  250. { // block to indicate the scope of jac_data
  251.  
  252. SCALAR jac_data[3 * NGEO];
  253.  
  254. jac_data[0] = -(one - eaux) * half;
  255. jac_data[1] = (one - eaux) * half;
  256. jac_data[2] = zero;
  257. jac_data[3] = -(one + eaux) * half;
  258. jac_data[4] = (one + eaux) * half;
  259. jac_data[5] = zero;
  260. jac_data[6] = -(one - eaux) * half;
  261. jac_data[7] = zero;
  262. jac_data[8] = (one - eaux) * half;
  263. jac_data[9] = -(one + eaux) * half;
  264. jac_data[10] = zero;
  265. jac_data[11] = (one + eaux) * half;
  266. jac_data[12] = -(one - daux - faux) * half;
  267. jac_data[13] = -daux * half;
  268. jac_data[14] = -faux * half;
  269. jac_data[15] = (one - daux - faux) * half;
  270. jac_data[16] = daux * half;
  271. jac_data[17] = faux * half;
  272.  
  273. /* Jacobian matrix J */
  274.  
  275. offset = ielem * geo_dat_size;
  276.  
  277. #pragma omp parallel for private(i) shared(jac_data, el_data_in) reduction(+ \
  278. : temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9)
  279. for (i = 0; i < NGEO; i++)
  280. {
  281.  
  282. jac_1 = jac_data[i];
  283. jac_2 = jac_data[NGEO + i];
  284. jac_3 = jac_data[2 * NGEO + i];
  285.  
  286. jac_4 = el_data_in[offset + 3 * i]; //node coor
  287. jac_5 = el_data_in[offset + 3 * i + 1];
  288. jac_6 = el_data_in[offset + 3 * i + 2];
  289.  
  290. //printf("el_data_in_geo_inside[%d]=%lf\n",offset+3*i,el_data_in[offset+3*i]); //ok
  291.  
  292. temp1 += jac_4 * jac_1;
  293. temp2 += jac_4 * jac_2;
  294. temp3 += jac_4 * jac_3;
  295. temp4 += jac_5 * jac_1;
  296. temp5 += jac_5 * jac_2;
  297. temp6 += jac_5 * jac_3;
  298. temp7 += jac_6 * jac_1;
  299. temp8 += jac_6 * jac_2;
  300. temp9 += jac_6 * jac_3;
  301. }
  302.  
  303. } // the end of scope for jac_data
  304.  
  305. jac_0 = (temp5 * temp9 - temp8 * temp6);
  306. jac_1 = (temp8 * temp3 - temp2 * temp9);
  307. jac_2 = (temp2 * temp6 - temp3 * temp5);
  308.  
  309. daux = temp1 * jac_0 + temp4 * jac_1 + temp7 * jac_2;
  310.  
  311. /* Jacobian calculations - |J| and inverse of the Jacobian matrix*/
  312. vol *= daux; // vol = weight * det J
  313.  
  314. faux = one / daux;
  315.  
  316. jac_0 *= faux;
  317. jac_1 *= faux;
  318. jac_2 *= faux;
  319.  
  320. jac_3 = (temp6 * temp7 - temp4 * temp9) * faux;
  321. jac_4 = (temp1 * temp9 - temp7 * temp3) * faux;
  322. jac_5 = (temp3 * temp4 - temp1 * temp6) * faux;
  323.  
  324. jac_6 = (temp4 * temp8 - temp5 * temp7) * faux;
  325. jac_7 = (temp2 * temp7 - temp1 * temp8) * faux;
  326. jac_8 = (temp1 * temp5 - temp2 * temp4) * faux;
  327.  
  328. //************* THE END OF: JACOBIAN TERMS CALCULATIONS *************************//
  329. //-------------------------------------------------------------
  330.  
  331. //-------------------------------------------------------------
  332. //***** SEPARATE COMPUTING OF ALL GLOBAL DERIVATIVES OF ALL SHAPE FUNCTIONS *****//
  333.  
  334. #pragma omp parallel for private(idof, temp1, temp2, temp3) default(shared)
  335. for (idof = 0; idof < NSHAP; idof++)
  336. {
  337.  
  338. // read proper values of shape functions and their derivatives
  339. temp1 = shape_fun_host[igauss * NSHAP + idof];
  340. temp2 = shape_fun_host[NSHAP * NGAUSS + igauss * NSHAP + idof];
  341. temp3 = shape_fun_host[2 * NSHAP * NGAUSS + igauss * NSHAP + idof];
  342.  
  343. tab_fun_u_derx[idof] = temp1 * jac_0 + temp2 * jac_3 + temp3 * jac_6;
  344. tab_fun_u_dery[idof] = temp1 * jac_1 + temp2 * jac_4 + temp3 * jac_7;
  345. tab_fun_u_derz[idof] = temp1 * jac_2 + temp2 * jac_5 + temp3 * jac_8;
  346.  
  347. } // end loop over shape functions for which global derivatives were computed
  348.  
  349. } // the end of block to indicate the scope of jac_x registers
  350.  
  351. //*** THE END OF: SEPARATE COMPUTING OF ALL GLOBAL DERIVATIVES OF ALL SHAPE FUNCTIONS ***//
  352. //-------------------------------------------------------------
  353.  
  354. //-------------------------------------------------------------
  355. //***** SUBSTITUTING ACTUAL COEFFICIENTS FOR SM AND LV CALCULATIONS *****//
  356.  
  357. //-------------------------------------------------------------
  358. //********************* first loop over shape functions ***********************//
  359.  
  360. //offset = ielem * (one_el_stiff_mat_size + one_el_load_vec_size); //loop invariant code motion
  361. #pragma omp parallel for private(idof) shared(igauss) reduction(+ \
  362. : load_vec[:NSHAP])
  363. for (idof = 0; idof < NSHAP; idof++)
  364. {
  365. { // beginning of using registers for u (shp_fun_u, fun_u_der.)
  366.  
  367. //-------------------------------------------------------------
  368. //****** SUBSTITUTING OR COMPUTING GLOBAL DERIVATIVES OF IDOF SHAPE FUNCTION ******//
  369.  
  370. SCALAR shp_fun_u = shape_fun_host[3 * NSHAP * NGAUSS + igauss * NSHAP + idof];
  371. SCALAR fun_u_derx = tab_fun_u_derx[idof];
  372. SCALAR fun_u_dery = tab_fun_u_dery[idof];
  373. SCALAR fun_u_derz = tab_fun_u_derz[idof];
  374.  
  375. //*** THE END OF: SUBSTITUTING OR COMPUTING GLOBAL DERIVATIVES OF IDOF SHAPE FUNCTION ***//
  376. //-------------------------------------------------------------
  377.  
  378. //-------------------------------------------------------------
  379. //*** ACTUAL INTERMEDIATE CALCULATIONS FOR IDOF SHAPE FUNCTION ***//
  380.  
  381. load_vec[idof] += (
  382.  
  383. coeff04 * fun_u_derx +
  384. coeff14 * fun_u_dery +
  385. coeff24 * fun_u_derz +
  386. coeff34 * shp_fun_u
  387.  
  388. ) *
  389. vol;
  390.  
  391. //*** THE END OF: ACTUAL CALCULATIONS FOR LOAD VECTOR (AND IDOF SHAPE FUNCTION) ***//
  392. //-------------------------------------------------------------
  393.  
  394. } // the end of using registers for u (shp_fun_u, fun_u_der.)
  395.  
  396. //-------------------------------------------------------------
  397. // ************************* second loop over shape functions ****************************//
  398.  
  399. for (jdof = 0; jdof < NSHAP; jdof++)
  400. {
  401. //-------------------------------------------------------------
  402. //****** SUBSTITUTING OR COMPUTING GLOBAL DERIVATIVES OF JDOF SHAPE FUNCTION ******//
  403.  
  404. // read proper values of shape functions and their derivatives
  405. SCALAR shp_fun_v = shape_fun_host[3 * NSHAP * NGAUSS + igauss * NSHAP + jdof];
  406. SCALAR fun_v_derx = tab_fun_u_derx[jdof];
  407. SCALAR fun_v_dery = tab_fun_u_dery[jdof];
  408. SCALAR fun_v_derz = tab_fun_u_derz[jdof];
  409.  
  410. //*** THE END OF: SUBSTITUTING OR COMPUTING GLOBAL DERIVATIVES OF IDOF SHAPE FUNCTION ***//
  411. //-------------------------------------------------------------
  412.  
  413. //-------------------------------------------------------------
  414. //********* ACTUAL FINAL CALCULATIONS FOR SM ENTRY *********//
  415.  
  416. stiff_mat[idof * NDOFS + jdof] += (temp4 * fun_v_derx +
  417. temp5 * fun_v_dery +
  418. temp6 * fun_v_derz +
  419. temp7 * shp_fun_v) *
  420. vol;
  421.  
  422. //*** THE END OF: ACTUAL FINAL CALCULATIONS FOR SM ENTRY ***//
  423. //-------------------------------------------------------------
  424.  
  425. } //jdof
  426.  
  427. } //idof
  428.  
  429. } //gauss
  430.  
  431. // // ******** THE END OF: loop over integration points ********//
  432. // //-------------------------------------------------------------
  433.  
  434. offset = ielem * (one_el_stiff_mat_size + one_el_load_vec_size);
  435.  
  436. #pragma omp parallel for private(i) shared(offset, stiff_mat, el_data_out)
  437. for (i = 0; i < NDOFS * NDOFS; i++)
  438. el_data_out[offset + i] = stiff_mat[i];
  439.  
  440. #pragma omp parallel for private(i) shared(offset, load_vec, el_data_out)
  441. for (i = 0; i < NDOFS; i++)
  442. el_data_out[offset + one_el_stiff_mat_size + i] = load_vec[i];
  443.  
  444. } // the end of loop over elements
  445.  
  446. return 1;
  447. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement