Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <stdlib.h>
- #include <math.h>
- #include <omp.h>
- #define SCALAR double
- #define NGAUSS 6
- #define NSHAP 6
- #define NGEO 6
- #define NDOFS 6
- #define TEST
- #define zero 0.0
- #define one 1.0
- #define two 2.0
- #define half 0.5
- #define one_fourth 0.25
- #define one_sixth (0.166666666666666667)
- #define weight_linear_prism (one_sixth)
- omp_lock_t lock;
- int pdr_num_int_el_QSS_prism(
- SCALAR *gauss_dat_host, // integration points data of elements having given p
- SCALAR *shape_fun_host, // shape functions on a reference element
- SCALAR *el_data_in, // data for integration of NR_ELEMS_THIS_KERCALL elements
- SCALAR *el_data_out, // result of integration of NR_ELEMS_THIS_KERCALL elements
- const int nr_elem_mic,
- const int size_el_out,
- const int size_el_in,
- const int size_shp,
- const int geo_dat_size,
- const int nr_coeff,
- const int one_el_stiff_mat_size,
- const int one_el_load_vec_size);
- int main()
- {
- omp_init_lock(&lock);
- srand(time(NULL));
- omp_set_num_threads(2);
- FILE *input;
- #ifdef TEST
- FILE *output;
- #endif
- input = fopen("input_TEST_PRISM_NORMAL.txt", "r");
- int Nr_elems_mic, size_in, size_out, size_shap, geo_dat_size, nr_coeff, one_el_stiff_mat_size, one_el_load_vec_size, i;
- fscanf(input, "%d", &Nr_elems_mic);
- printf("Nr_elems_mic=%d\n", Nr_elems_mic);
- fscanf(input, "%d", &size_in);
- printf("Size_in=%d\n", size_in);
- fscanf(input, "%d", &size_out);
- printf("Size_out=%d\n", size_out);
- fscanf(input, "%d", &size_shap);
- printf("Size_shap=%d\n", size_shap);
- fscanf(input, "%d", &geo_dat_size);
- printf("geo_dat_size=%d\n", geo_dat_size);
- fscanf(input, "%d", &nr_coeff);
- printf("nr_coeff=%d\n", nr_coeff);
- fscanf(input, "%d", &one_el_stiff_mat_size);
- printf("one_el_stiff_mat_size=%d\n", one_el_stiff_mat_size);
- fscanf(input, "%d", &one_el_load_vec_size);
- printf("one_el_load_vec_size=%d\n", one_el_load_vec_size);
- printf("\n\n\n\n");
- SCALAR *gauss_dat_host = (SCALAR *)malloc(1344 * sizeof(SCALAR));
- SCALAR *shape_fun_host;
- SCALAR *el_data_in;
- SCALAR *el_data_out;
- #ifdef TEST
- SCALAR *testing;
- #endif
- shape_fun_host = (SCALAR *)malloc(size_shap * sizeof(SCALAR));
- el_data_in = (SCALAR *)malloc(size_in * sizeof(SCALAR));
- el_data_out = (SCALAR *)malloc(size_out * sizeof(SCALAR));
- #ifdef TEST
- testing = (SCALAR *)malloc(size_out * sizeof(SCALAR));
- #endif
- for (i = 0; i < 1344; i++)
- fscanf(input, "%lf", &gauss_dat_host[i]);
- printf("Wczytano punkty Gaussa\n");
- for (i = 0; i < size_shap; i++)
- fscanf(input, "%lf", &shape_fun_host[i]);
- printf("Wczytano funkcje ksztaltu\n");
- for (i = 0; i < size_in; i++)
- fscanf(input, "%lf", &el_data_in[i]);
- printf("Wczytano parametry poczatkowe\n");
- fclose(input);
- #ifdef TEST
- output = fopen("output_TEST_PRISM_NORMAL.txt", "r");
- printf("Otwarcie outputu\n");
- for (i = 0; i < size_out; i++)
- fscanf(output, "%lf ", &testing[i]);
- fclose(output);
- #endif
- printf("Start!\n");
- double start = omp_get_wtime();
- pdr_num_int_el_QSS_prism(gauss_dat_host, shape_fun_host,
- el_data_in, el_data_out, Nr_elems_mic,
- size_out, size_in, size_shap, geo_dat_size, nr_coeff, one_el_stiff_mat_size, one_el_load_vec_size);
- printf("Time: %f\n", omp_get_wtime() - start);
- printf("Koniec!\n");
- #ifdef TEST
- SCALAR delta = 0.00001;
- for (i = 0; i < size_out; i++)
- {
- if (fabs(testing[i] - el_data_out[i]) > delta)
- {
- printf("Error in %d element->test=%.10lf != out=%.10lf\n", i, testing[i], el_data_out[i]);
- exit(0);
- }
- }
- printf("Test OK!\n");
- #endif
- }
- int pdr_num_int_el_QSS_prism(
- SCALAR *gauss_dat_host, // integration points data of elements having given p
- SCALAR *shape_fun_host, // shape functions on a reference element
- SCALAR *el_data_in, // data for integration of NR_ELEMS_THIS_KERCALL elements
- SCALAR *el_data_out, // result of integration of NR_ELEMS_THIS_KERCALL elements
- const int nr_elem_mic,
- const int size_el_out,
- const int size_el_in,
- const int size_shp,
- const int geo_dat_size,
- const int nr_coeff,
- const int one_el_stiff_mat_size,
- const int one_el_load_vec_size)
- {
- int ielem;
- register int offset;
- #pragma omp parallel for private(ielem, offset)
- for (ielem = 0; ielem < nr_elem_mic; ielem++)
- {
- SCALAR tab_fun_u_derx[NSHAP];
- SCALAR tab_fun_u_dery[NSHAP];
- SCALAR tab_fun_u_derz[NSHAP];
- SCALAR stiff_mat[NDOFS * NDOFS];
- SCALAR load_vec[NDOFS];
- int i;
- //-------------------------------------------------------------
- // ******************* READING INPUT DATA *********************
- offset = nr_elem_mic * geo_dat_size + ielem * nr_coeff;
- register SCALAR coeff00 = el_data_in[offset + 0];
- register SCALAR coeff01 = el_data_in[offset + 1];
- register SCALAR coeff02 = el_data_in[offset + 2];
- register SCALAR coeff10 = el_data_in[offset + 3];
- register SCALAR coeff11 = el_data_in[offset + 4];
- register SCALAR coeff12 = el_data_in[offset + 5];
- register SCALAR coeff20 = el_data_in[offset + 6];
- register SCALAR coeff21 = el_data_in[offset + 7];
- register SCALAR coeff22 = el_data_in[offset + 8];
- register SCALAR coeff30 = el_data_in[offset + 9];
- register SCALAR coeff31 = el_data_in[offset + 10];
- register SCALAR coeff32 = el_data_in[offset + 11];
- register SCALAR coeff03 = el_data_in[offset + 12];
- register SCALAR coeff13 = el_data_in[offset + 13];
- register SCALAR coeff23 = el_data_in[offset + 14];
- register SCALAR coeff33 = el_data_in[offset + 15];
- register SCALAR coeff04 = el_data_in[offset + 16];
- register SCALAR coeff14 = el_data_in[offset + 17];
- register SCALAR coeff24 = el_data_in[offset + 18];
- register SCALAR coeff34 = el_data_in[offset + 19];
- // ******* THE END OF: READING INPUT DATA *********************
- //-------------------------------------------------------------
- //-------------------------------------------------------------
- //******************** INITIALIZING SM AND LV ******************//
- #pragma omp parallel for private(i) shared(stiff_mat)
- for (i = 0; i < NDOFS * NDOFS; i++)
- stiff_mat[i] = zero;
- #pragma omp parallel for private(i) shared(load_vec)
- for (i = 0; i < NDOFS; i++)
- load_vec[i] = zero;
- //******************** END OF: INITIALIZING SM AND LV ******************//
- //-------------------------------------------------------------
- //-------------------------------------------------------------
- //************************* LOOP OVER INTEGRATION POINTS ************************//
- // in a loop over gauss points
- int igauss;
- int idof, jdof;
- #pragma omp parallel private(igauss) shared(idof, jdof, load_vec, stiff_mat)
- for (igauss = 0; igauss < NGAUSS; igauss++)
- {
- SCALAR daux = gauss_dat_host[4 * igauss];
- SCALAR faux = gauss_dat_host[4 * igauss + 1];
- SCALAR eaux = gauss_dat_host[4 * igauss + 2];
- SCALAR vol = weight_linear_prism; // vol = weight CONSTANT FOR LINEAR PRISM!!!
- //-------------------------------------------------------------
- //************************* JACOBIAN TERMS CALCULATIONS *************************//
- SCALAR temp1 = zero;
- SCALAR temp2 = zero;
- SCALAR temp3 = zero;
- SCALAR temp4 = zero;
- SCALAR temp5 = zero;
- SCALAR temp6 = zero;
- SCALAR temp7 = zero;
- SCALAR temp8 = zero;
- SCALAR temp9 = zero;
- { // block to indicate the scope of jac_x registers
- SCALAR jac_0 = zero;
- SCALAR jac_1 = zero;
- SCALAR jac_2 = zero;
- SCALAR jac_3 = zero;
- SCALAR jac_4 = zero;
- SCALAR jac_5 = zero;
- SCALAR jac_6 = zero;
- SCALAR jac_7 = zero;
- SCALAR jac_8 = zero;
- { // block to indicate the scope of jac_data
- SCALAR jac_data[3 * NGEO];
- jac_data[0] = -(one - eaux) * half;
- jac_data[1] = (one - eaux) * half;
- jac_data[2] = zero;
- jac_data[3] = -(one + eaux) * half;
- jac_data[4] = (one + eaux) * half;
- jac_data[5] = zero;
- jac_data[6] = -(one - eaux) * half;
- jac_data[7] = zero;
- jac_data[8] = (one - eaux) * half;
- jac_data[9] = -(one + eaux) * half;
- jac_data[10] = zero;
- jac_data[11] = (one + eaux) * half;
- jac_data[12] = -(one - daux - faux) * half;
- jac_data[13] = -daux * half;
- jac_data[14] = -faux * half;
- jac_data[15] = (one - daux - faux) * half;
- jac_data[16] = daux * half;
- jac_data[17] = faux * half;
- /* Jacobian matrix J */
- offset = ielem * geo_dat_size;
- #pragma omp parallel for private(i) shared(jac_data, el_data_in) reduction(+ \
- : temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9)
- for (i = 0; i < NGEO; i++)
- {
- jac_1 = jac_data[i];
- jac_2 = jac_data[NGEO + i];
- jac_3 = jac_data[2 * NGEO + i];
- jac_4 = el_data_in[offset + 3 * i]; //node coor
- jac_5 = el_data_in[offset + 3 * i + 1];
- jac_6 = el_data_in[offset + 3 * i + 2];
- //printf("el_data_in_geo_inside[%d]=%lf\n",offset+3*i,el_data_in[offset+3*i]); //ok
- temp1 += jac_4 * jac_1;
- temp2 += jac_4 * jac_2;
- temp3 += jac_4 * jac_3;
- temp4 += jac_5 * jac_1;
- temp5 += jac_5 * jac_2;
- temp6 += jac_5 * jac_3;
- temp7 += jac_6 * jac_1;
- temp8 += jac_6 * jac_2;
- temp9 += jac_6 * jac_3;
- }
- } // the end of scope for jac_data
- jac_0 = (temp5 * temp9 - temp8 * temp6);
- jac_1 = (temp8 * temp3 - temp2 * temp9);
- jac_2 = (temp2 * temp6 - temp3 * temp5);
- daux = temp1 * jac_0 + temp4 * jac_1 + temp7 * jac_2;
- /* Jacobian calculations - |J| and inverse of the Jacobian matrix*/
- vol *= daux; // vol = weight * det J
- faux = one / daux;
- jac_0 *= faux;
- jac_1 *= faux;
- jac_2 *= faux;
- jac_3 = (temp6 * temp7 - temp4 * temp9) * faux;
- jac_4 = (temp1 * temp9 - temp7 * temp3) * faux;
- jac_5 = (temp3 * temp4 - temp1 * temp6) * faux;
- jac_6 = (temp4 * temp8 - temp5 * temp7) * faux;
- jac_7 = (temp2 * temp7 - temp1 * temp8) * faux;
- jac_8 = (temp1 * temp5 - temp2 * temp4) * faux;
- //************* THE END OF: JACOBIAN TERMS CALCULATIONS *************************//
- //-------------------------------------------------------------
- //-------------------------------------------------------------
- //***** SEPARATE COMPUTING OF ALL GLOBAL DERIVATIVES OF ALL SHAPE FUNCTIONS *****//
- #pragma omp parallel for private(idof, temp1, temp2, temp3) default(shared)
- for (idof = 0; idof < NSHAP; idof++)
- {
- // read proper values of shape functions and their derivatives
- temp1 = shape_fun_host[igauss * NSHAP + idof];
- temp2 = shape_fun_host[NSHAP * NGAUSS + igauss * NSHAP + idof];
- temp3 = shape_fun_host[2 * NSHAP * NGAUSS + igauss * NSHAP + idof];
- tab_fun_u_derx[idof] = temp1 * jac_0 + temp2 * jac_3 + temp3 * jac_6;
- tab_fun_u_dery[idof] = temp1 * jac_1 + temp2 * jac_4 + temp3 * jac_7;
- tab_fun_u_derz[idof] = temp1 * jac_2 + temp2 * jac_5 + temp3 * jac_8;
- } // end loop over shape functions for which global derivatives were computed
- } // the end of block to indicate the scope of jac_x registers
- //*** THE END OF: SEPARATE COMPUTING OF ALL GLOBAL DERIVATIVES OF ALL SHAPE FUNCTIONS ***//
- //-------------------------------------------------------------
- //-------------------------------------------------------------
- //***** SUBSTITUTING ACTUAL COEFFICIENTS FOR SM AND LV CALCULATIONS *****//
- //-------------------------------------------------------------
- //********************* first loop over shape functions ***********************//
- //offset = ielem * (one_el_stiff_mat_size + one_el_load_vec_size); //loop invariant code motion
- #pragma omp parallel for private(idof) shared(igauss) reduction(+ \
- : load_vec[:NSHAP])
- for (idof = 0; idof < NSHAP; idof++)
- {
- { // beginning of using registers for u (shp_fun_u, fun_u_der.)
- //-------------------------------------------------------------
- //****** SUBSTITUTING OR COMPUTING GLOBAL DERIVATIVES OF IDOF SHAPE FUNCTION ******//
- SCALAR shp_fun_u = shape_fun_host[3 * NSHAP * NGAUSS + igauss * NSHAP + idof];
- SCALAR fun_u_derx = tab_fun_u_derx[idof];
- SCALAR fun_u_dery = tab_fun_u_dery[idof];
- SCALAR fun_u_derz = tab_fun_u_derz[idof];
- //*** THE END OF: SUBSTITUTING OR COMPUTING GLOBAL DERIVATIVES OF IDOF SHAPE FUNCTION ***//
- //-------------------------------------------------------------
- //-------------------------------------------------------------
- //*** ACTUAL INTERMEDIATE CALCULATIONS FOR IDOF SHAPE FUNCTION ***//
- load_vec[idof] += (
- coeff04 * fun_u_derx +
- coeff14 * fun_u_dery +
- coeff24 * fun_u_derz +
- coeff34 * shp_fun_u
- ) *
- vol;
- //*** THE END OF: ACTUAL CALCULATIONS FOR LOAD VECTOR (AND IDOF SHAPE FUNCTION) ***//
- //-------------------------------------------------------------
- } // the end of using registers for u (shp_fun_u, fun_u_der.)
- //-------------------------------------------------------------
- // ************************* second loop over shape functions ****************************//
- for (jdof = 0; jdof < NSHAP; jdof++)
- {
- //-------------------------------------------------------------
- //****** SUBSTITUTING OR COMPUTING GLOBAL DERIVATIVES OF JDOF SHAPE FUNCTION ******//
- // read proper values of shape functions and their derivatives
- SCALAR shp_fun_v = shape_fun_host[3 * NSHAP * NGAUSS + igauss * NSHAP + jdof];
- SCALAR fun_v_derx = tab_fun_u_derx[jdof];
- SCALAR fun_v_dery = tab_fun_u_dery[jdof];
- SCALAR fun_v_derz = tab_fun_u_derz[jdof];
- //*** THE END OF: SUBSTITUTING OR COMPUTING GLOBAL DERIVATIVES OF IDOF SHAPE FUNCTION ***//
- //-------------------------------------------------------------
- //-------------------------------------------------------------
- //********* ACTUAL FINAL CALCULATIONS FOR SM ENTRY *********//
- stiff_mat[idof * NDOFS + jdof] += (temp4 * fun_v_derx +
- temp5 * fun_v_dery +
- temp6 * fun_v_derz +
- temp7 * shp_fun_v) *
- vol;
- //*** THE END OF: ACTUAL FINAL CALCULATIONS FOR SM ENTRY ***//
- //-------------------------------------------------------------
- } //jdof
- } //idof
- } //gauss
- // // ******** THE END OF: loop over integration points ********//
- // //-------------------------------------------------------------
- offset = ielem * (one_el_stiff_mat_size + one_el_load_vec_size);
- #pragma omp parallel for private(i) shared(offset, stiff_mat, el_data_out)
- for (i = 0; i < NDOFS * NDOFS; i++)
- el_data_out[offset + i] = stiff_mat[i];
- #pragma omp parallel for private(i) shared(offset, load_vec, el_data_out)
- for (i = 0; i < NDOFS; i++)
- el_data_out[offset + one_el_stiff_mat_size + i] = load_vec[i];
- } // the end of loop over elements
- return 1;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement