Untitled

#include <omp.h>
#include <stdio.h>
#include <stdlib.h>

#define THREAD_COUNT 4
#define SIZE 2000

int main (int argc, char *argv[]) {

  double** mat2 = new double*[SIZE];
  double** mat = new double*[SIZE];
  double** result = new double*[SIZE]; // after multiplying vec and mat, we will get a new vector of the same SIZE
  int i,j,k;
  double start, end;
  double realCPUTime, userCPUTime;

  // show number of threads before we actually set the thread number
  printf("Number of active threads is: %d.\n", omp_get_thread_num());

  // set the specified number of threads to execute the parallel segment
  omp_set_num_threads(THREAD_COUNT);

  for(i = 0; i < SIZE; i++){
	  mat[i] = new double[SIZE];
	  mat2[i] = new double[SIZE];
	  result[i] = new double[SIZE];
  }

  printf("Initializing arrays with random numbers.\n");
  for(i = 0; i < SIZE; i++){
      for(j = 0; j < SIZE; j++){
          mat[i][j] = (double)rand() / 100;
          mat2[i][j] = (double)rand() / 100;
          result[i][j] = 0;
      }
  }

  start = omp_get_wtime();
  /* This creates a team of threads; each thread has own copy of variables tid, i and j, but share variables result, vec and mat  */
  #pragma omp parallel private(i, j, k, start, end) shared(result, mat2, mat, realCPUTime)
  {
	  #pragma omp for
      for(i = 0; i < SIZE; i++){
    	  start = omp_get_wtime();
		  for(j = 0; j < SIZE; j++){
			  for(k = 0; k < SIZE; k++){
				  result[i][j] += mat2[i][k] * mat[k][j];
			  }
		  }
		  end = omp_get_wtime();
		  realCPUTime += end - start;
      }
  }
  end = omp_get_wtime();

  userCPUTime = end - start;

  printf("User CPU time took %f seconds.\n", userCPUTime);
  printf("Real CPU time took %f seconds.\n", realCPUTime);

  double mflops = SIZE/(userCPUTime*1000000);
  mflops *= SIZE * SIZE * 2;

  printf("MFLOPS: %f\n", mflops);

  return 0;
}