Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <omp.h>
- #include <math.h>
- #include <stdio.h>
- #include <stdlib.h>
- void integrate_cpu(int q, int n, float *a, float *b, float *z) {
- #pragma omp parallel
- {
- int i, j;
- float h, w, x;
- float res;
- #pragma omp for
- for (i = 0; i < n; ++i) {
- res = 0.0f;
- h = (b[i] - a[i]) / (q - 1);
- w = (b[i] - a[i]) / q;
- for (j = 0; j < q; ++j) {
- x = a[i] + j * h;
- res += 1.0f / sqrtf(x);
- }
- z[i] = w * res;
- if (omp_get_thread_num() == 0 && 100ul * (i + 1) / n != 100ul * i / n) {
- printf(" [% 3d%%]\r", omp_get_num_threads() * 100ul * (i + 1) / n);
- fflush (stdout);
- }
- }
- }
- }
- int main()
- {
- float *a, *b, *z_exact, *z_cpu, *z_gpu;
- float time_cpu, time_gpu, err_cpu, err_gpu;
- int n, q, i;
- unsigned long long flops;
- q = 1000;
- n = 1 << 25;
- flops = 4ul * (unsigned long long) n * (unsigned long long) q
- + 6ul * (unsigned long long) n;
- srand(42);
- a = (float*) malloc(n * sizeof(float));
- b = (float*) malloc(n * sizeof(float));
- z_exact = (float*) malloc(n * sizeof(float));
- z_cpu = (float*) malloc(n * sizeof(float));
- z_gpu = (float*) malloc(n * sizeof(float));
- /*
- * Init intervals within [[1,2],[2.5,3.5]]
- */
- for (i = 0; i < n; ++i) {
- a[i] = 1.0f + (float) rand() / (float) RAND_MAX;
- b[i] = 2.5f + (float) rand() / (float) RAND_MAX;
- }
- /*
- * Exact value of \int_a^b 1 / sqrt(x) dx = 2 * (sqrt(b) - sqrt(a))
- */
- for (i = 0; i < n; ++i) {
- z_exact[i] = 2.0f * (sqrtf(b[i]) - sqrtf(a[i]));
- }
- printf("exact:\n");
- /*
- * CPU computation of the integrals.
- */
- integrate_cpu(q, n, a, b, z_cpu);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement