#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <sys/time.h>
#include <omp.h>
#include "papi.h"
void
LU (double * mat,int num);
#define SIZE 11
double mat[(1 << SIZE) * (1 << SIZE)];
int
main ()
{
srand (time (NULL));
int i;
int EventSet = PAPI_NULL;
// 諸々の初期化
if (PAPI_library_init(PAPI_VER_CURRENT) != PAPI_VER_CURRENT) {
fprintf(stderr, "PAPI library init error!\\n");
exit(1);
}
if (PAPI_thread_init((unsigned long (*)(void))(omp_get_num_threads)) != PAPI_OK) {
fprintf(stderr, "PAPI thread init error.\\n");
exit(1);
}
if (PAPI_num_counters() < 2) {
fprintf(stderr, "No hardware counters here, or PAPI not supported.\\n");
exit(1);
}
// 浮動小数点演算の回数を取得する
PAPI_create_eventset( &EventSet );
PAPI_add_event(EventSet, PAPI_FP_OPS);
puts ("Size\\tReal_Time\\tProcess_Time\\tFLPops\\tMFlops");
int nthreads;
#pragma omp parallel
{
// 0番スレッドのときはスレッド数を取得する
if (omp_get_thread_num() == 0)
nthreads = omp_get_num_threads();
}
for (i = 1;i <= SIZE; ++i)
{
float rtime,ptime,mflops;
long long flpops = 0;
int ret;
const int num = 1 << i;
int j;
// 適当に行列に要素を入れる
for (j = 0;j < num * num; ++j)
{
mat[j] = rand () / 1000.0;
}
long long values[nthreads];
for (j = 0 ; j < nthreads;j++) {
values[j] = 0;
}
struct timeval start,end;
long long v_start,v_end;
// 開始時間の取得
v_start = PAPI_get_virt_usec();
gettimeofday(&start, NULL);
// 命令数のカウントの開始
if ((ret = PAPI_start (EventSet)) != PAPI_OK) {
fprintf(stderr, "PAPI failed to start counters: %s\\n", PAPI_strerror(ret));
exit(1);
}
#pragma omp parallel
{
LU (mat,num);
// 命令数のカウントの終了
if ((ret = PAPI_stop (EventSet,values + omp_get_thread_num ())) != PAPI_OK) {
fprintf(stderr, "PAPI failed to read counters: %s\\n", PAPI_strerror(ret));
exit(1);
}
}
// 終了時間の取得
gettimeofday(&end, NULL);
v_end = PAPI_get_virt_usec();
rtime = (end.tv_sec - start.tv_sec) + 0.000001 * (end.tv_usec - start.tv_usec);
ptime = (v_end - v_start) * 1.0e-6;
// 各スレッドでの実行命令数を足す
flpops = 0;
for (j = 0 ; j < nthreads;j++) {
flpops += values[j];
}
// flopsの計算
mflops = flpops/ptime*1.0e-6;
printf ("%d\\t%f\\t%f\\t%lld\\t%f\\n",num,rtime,ptime,flpops,mflops);
}
}