Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- compile:
- gcc minimal_parallel_compute_distances.c -Wall -g -fopenmp -o minimal
- */
- #include <stdio.h>
- #include <stdlib.h>
- #include <sys/time.h>
- #include <omp.h>
- #define DMAX 10000000
- // -- kmeans --
- int n = 0; // number of points
- int d = 0; // number of dimensions
- int k = 0; // number of clusters
- int p = 0; // number of threads
- double **dataset = NULL; // matrix of points
- // -- time and throughput --
- int iters = 0;
- long int total_ops = 0;
- long int total_iters = 0;
- double time_spent = 0;
- double throughput = 0;
- double mean_time_spent = 0;
- double mean_throughput = 0;
- double mean_total_iters = 0;
- // -- init_centroids --
- double **centroids = NULL; // (k, d) matrix
- int *clusters = NULL; // (n, ) vector
- double *distances = NULL; // (n, ) vector
- double *dist_sum_threads = NULL; // (p, ) vector
- long int *total_ops_threads = NULL; // (p, ) vector
- int chunk = 0;
- double dist_sum = 0;
- double wtime_start = 0; // start time using omp_get_wtime()
- double wtime_end = 0; // end time using omp_get_wtime()
- double wtime_spent = 0; // wtime_end - wtime_start
- double mean_wtime_spent = 0;
- double **parallel_compute_distances (double **dataset, int n, int d, int k, long int *total_ops) {
- double dist=0, error=0, mindist=0;
- int cn, cd, ck, mink, id, cp;
- // reset before parallel region
- dist_sum = 0;
- // -- start time --
- wtime_start = omp_get_wtime ();
- // parallel loop
- # pragma omp parallel shared(distances, clusters, centroids, dataset, chunk, dist_sum, dist_sum_threads) private(id, cn, ck, cd, cp, error, dist, mindist, mink)
- {
- id = omp_get_thread_num();
- dist_sum_threads[id] = 0; // reset
- // 2. recompute distances against centroids
- # pragma omp for schedule(static,chunk)
- for (cn=0; cn<n; cn++) {
- mindist = DMAX;
- mink = 0;
- for (ck=0; ck<k; ck++) {
- dist = 0;
- for (cd=0; cd<d; cd++) {
- error = dataset[cn][cd] - centroids[ck][cd];
- dist = dist + (error * error); total_ops_threads[id]++;
- }
- if (dist < mindist) {
- mindist = dist;
- mink = ck;
- }
- }
- distances[cn] = mindist;
- clusters[cn] = mink;
- dist_sum_threads[id] += mindist;
- }
- // bad parallel reduction
- //#pragma omp parallel for reduction(+:dist_sum)
- //for (cp=0; cp<p; cp++){
- // dist_sum += dist_sum_threads[cp];
- //}
- }
- // -- end time --
- wtime_end = omp_get_wtime ();
- // -- total wall time --
- wtime_spent = wtime_end - wtime_start;
- // sequential reduction
- for (cp=0; cp<p; cp++)
- dist_sum += dist_sum_threads[cp];
- // stats
- *(total_ops) = 0;
- for (cp=0; cp<p; cp++)
- *(total_ops) += total_ops_threads[cp];
- return centroids;
- }
- void free_matrix(void **matrix, int n) {
- if (!matrix) {
- printf("No matrix to release");
- return;
- }
- int i = 0;
- for (i=0; i<n; i++) {
- free(matrix[i]);
- }
- free(matrix);
- }
- void print_matrix(double **matrix, int n, int m) {
- int i = 0, j = 0;
- if (!matrix) {
- printf("No matrix to display");
- return;
- }
- for (i=0; i<n; i++) {
- for (j=0; j<m; j++) {
- printf("%.2f ", matrix[i][j]);
- }
- printf("\n");
- }
- }
- void load_data () {
- int cn = 0, cd = 0, ck = 0;
- // init dataset
- dataset = (double**) malloc( sizeof(double*) * n);
- if (!dataset) {
- printf("Error on memory allocation");
- exit(1);
- }
- for (cn=0; cn<n; cn++) {
- dataset[cn] = (double *) malloc( sizeof(double) * d);
- if (!dataset[cn]) {
- printf("Error on memory allocation");
- exit(1);
- }
- for (cd=0; cd<d; cd++)
- dataset[cn][cd] = cn*cd+cn+cd; // some arbitrary number
- }
- printf ("dataset loaded\n");
- // init centroids
- centroids = (double**) malloc( sizeof(double*) * n);
- if (!centroids) {
- printf("Error on memory allocation");
- exit(1);
- }
- for (ck=0; ck<k; ck++) {
- centroids[ck] = (double *) malloc( sizeof(double) * d);
- if (!centroids[ck]) {
- printf("Error on memory allocation");
- exit(1);
- }
- for (cd=0; cd<d; cd++)
- centroids[ck][cd] = ck+1; // [1,..,1], [2,..,2], ..., [16,..,16]
- }
- printf ("centroids loaded\n");
- //print_matrix (dataset, n, d);
- //print_matrix (centroids, k, d);
- }
- void run_parallel_compute_distances (double **dataset, int n, int d, int k, double *time_spent, double *throughput, long int *total_ops) {
- // init memory
- clusters = (int*) malloc (sizeof(int) * n);
- distances = (double*) malloc (sizeof(double) * n);
- dist_sum_threads = (double*) malloc (sizeof(double) * p);
- // init total ops by thread
- total_ops_threads = (long int *) malloc(sizeof(long int) * p);
- int cp;
- for (cp=0; cp<p; cp++)
- total_ops_threads[cp] = 0;
- // run main function
- parallel_compute_distances (dataset, n, d, k, total_ops);
- // stats
- *throughput = (*total_ops/1000000.0); // millions of opers
- *throughput = (*throughput / wtime_spent); // millions of opers / time (seconds)
- // free memory
- free (distances);
- free (clusters);
- free (dist_sum_threads);
- free (total_ops_threads);
- }
- int main (int argc, char *argv[]) {
- // problem size
- n = 100000; // number of points
- d = 40; // number of dimensions
- k = 16; // number of clusters
- // load dataset
- load_data ();
- // define p and chunk
- int id;
- #pragma omp parallel shared(p, chunk)
- {
- id = omp_get_thread_num();
- // set p and chunk
- if (id == 0) {
- p = omp_get_num_threads();
- chunk = n/p;
- }
- }
- // run it
- int i;
- iters = 5;
- for (i=0; i<iters; i++) {
- // reset vars
- wtime_spent = 0;
- throughput = 0;
- total_ops = 0;
- total_iters = 0;
- run_parallel_compute_distances (dataset, n, d, k, &time_spent, &throughput, &total_ops);
- printf ("test: %d, wall time: %.8f, ops: %ld, throughput (millions of opers/sec): %.8f, dist_sum: %.4f\n", i+1, wtime_spent, total_ops, throughput, dist_sum);
- mean_time_spent += time_spent;
- mean_wtime_spent += wtime_spent;
- mean_throughput += throughput;
- mean_total_iters += total_iters;
- }
- mean_time_spent /= iters;
- mean_wtime_spent /= iters;
- mean_throughput /= iters;
- mean_total_iters /= iters;
- printf ("size n: %d\n", n);
- printf ("threads p: %d\n", p);
- printf ("compute distances mean time spent: %.8f\n", mean_time_spent);
- printf ("compute distances mean wall time spent: %.8f\n", mean_wtime_spent);
- printf ("compute distances mean throughput: %.8f\n", mean_throughput);
- // free'em
- free_matrix ((void **) dataset, n);
- free_matrix ((void **) centroids, k);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement