Алгоритм Ланцоша с выборочной ортогонализацией

//Lanshotz algorithm
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <math.h>
using namespace std;

void init_vec(double *obj,int N, int is_mat) {
    srand(time(NULL));
    if (is_mat == 0) {
        for (int k=0; k<N; ++k) {
            obj[k] = rand() % 10;
        }
    }
    else {
        for (int k = 0; k < N; ++k) {
            for (int p = k; p < N; ++p) {
                obj[k*N+p] = rand() % 10;
                obj[p*N+k] = obj[k*N+p];
            }
        }
    }
}


void dotproduct(double *a, double *b, double *res, int N) {
    double result = 0;
    for (int i = 0; i < N; ++i) {
        result += a[i]*b[i];
    }
    *res = result;
}

void constproduct(double *a, double b, double *res, int N) {
    for (int i = 0; i < N; ++i) {
        res[i] = a[i]*b;
    }
}

void sum(double *a, double *b, double *res, int N) {
    for (int i = 0; i < N; ++i) {
        res[i] = a[i] + b[i];
    }
}

void diff(double *a, double *b, double *res, int N) {
    for (int i = 0; i < N; ++i) {
        res[i] = a[i] - b[i];
    }
}

void ort(double *v,double *vec_storage, int curent_iter, int N) {
    double *c = (double*)calloc(N, sizeof(double));
    double *s = (double*)calloc(N, sizeof(double));
    double ress = 0;
    for (int j = 0; j<curent_iter;j++) {
        dotproduct(v, &vec_storage[N*j], &ress, N);
        constproduct(&vec_storage[N*j], ress, c, N);// v[j],scalarproduct(w,v[j],num,procnum),c,num,procnum);
        sum(s, c, s, N);
    }
    constproduct(s, -1, s, N);
    sum(v, s, v, N);
}


void matvec(double *vec, int N, int loc_rows, double *res, int MAX_ITER) {
    double t2 = MPI_Wtime();
//  int MAX_ITER = 20;
    double *local_mat = new double[loc_rows*N];
    double *local_res = new double[loc_rows];
    double *total_res;// = new double[N];
    double *vec_storage;
    int rank;

    double *a = new double[MAX_ITER];
    double *b = new double[MAX_ITER];


    double t3 = MPI_Wtime();
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    if (rank == 0) {
        vec_storage = new double[MAX_ITER*N];
        for (int k = 0; k < N; ++k) {
            vec_storage[k] = vec[k];
        }
    }

    //MPI_Scatter(&mat[0], loc_rows*N, MPI_DOUBLE, &local_mat[0], loc_rows*N, MPI_DOUBLE, 0, MPI_COMM_WORLD);
    for (int ii = 0; ii < loc_rows; ii++) {
        for (int jj =0; jj < N; ++ jj) {
            local_mat[ii*N+jj]= (ii*jj) / sqrt(ii*ii + jj*jj);
        }
    }


    for (int iter = 1; iter < MAX_ITER-1; iter ++) {
        MPI_Bcast(&vec[0], N, MPI_DOUBLE, 0, MPI_COMM_WORLD);
        t3 = MPI_Wtime() - t3;
        //std::cout << "DATA_TRANSFER_PROC_" << rank << ":" << t3 << std::endl;
        for (int i =0; i < loc_rows; ++i) {
            local_res[i] = 0;
            for (int j = 0; j<N;++j) {
                local_res[i] += local_mat[i*N+j] * vec[j];
            }
        }
        if (rank == 0) {
            total_res = new double[N];
        }
        MPI_Gather(&local_res[0], loc_rows, MPI_DOUBLE, &total_res[0], loc_rows, MPI_DOUBLE, 0, MPI_COMM_WORLD);
        if (rank == 0) {
            for (int i = 0; i < N; ++i) {
                vec[i] = total_res[i];
            }
            double *c = new double[N];


            dotproduct(&vec_storage[N*iter],total_res, &a[iter],N);

            constproduct(&vec_storage[N*iter], a[iter], c, N);
            diff(total_res,c,total_res,N);

            if (iter!=0) {
                constproduct(&vec_storage[N*(iter-1)],b[iter],c,N);
                diff(total_res ,c,total_res ,N);
            }
            ort(total_res,vec_storage,iter,N);
            ort(total_res,vec_storage,iter,N);
            double d;
            dotproduct(total_res,total_res,&d,N);
            b[iter+1]=sqrt(d);
            if (b[iter+1]==0) {
                //cout<<"just"<<iter<<endl;
                // k=iter;
                break;
            }
            constproduct(total_res,1/b[iter+1], &vec_storage[N*(iter+1)],N);
        }
    }
    if (rank == 0) {
        delete[] vec_storage;
        delete[] total_res;

    }
    delete[] local_res;
    delete[] local_mat;

        /*if (rank == 0){
//          dotproduct()
            for (int i =0; i < N; ++i) {
                res[i] = total_res[i];
            }
        }*/
    t2 = MPI_Wtime()- t2;
    //cout <<"INNER_PROC_" << rank <<": "<< t2 - t3 << endl;

}

int main(int argc, char* argv[]) {
    MPI_Init(&argc,&argv);

    int size = 0;
    int rank = 0;
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    for (int MAX_IT = 42; MAX_IT < 50; MAX_IT +=2) {
    for (int dim = 4992; dim <= 49920; dim += 4992){
            double t1 = MPI_Wtime();
            int N = 20000;
            N = dim;
            int *send;
            double *mat;
            double *vec = new double[N];
            double *result;
            if (N % size != 0) {
                MPI_Abort(MPI_COMM_WORLD,10);
                //printf("SIZE mod N_processors != 0\n");
                std::cout << "SIZE mod N_processors != 0" << std::endl;
                return(-1);
            }
            int mat_s;
            if (rank == 0) {
                mat_s = N*N;
                //mat = new double[mat_s];
                //init_vec(mat, mat_s);
                int elems_per_proc = mat_s / size;
                //init_vec(mat, N, 1);
                init_vec(vec, N, 0);
                result = new double[N];
            }
            matvec(vec, N, N/size, result, MAX_IT);
            //int *temp = new int[100];
            if (rank == 0) {
                //FILE * pfile;
                //pfile = fopen("results", "a");
                t1 = MPI_Wtime()- t1;
                //delete[] mat;
                delete[] vec;
                delete[] result;
                //fprintf(pfile, "NCORES: %d. MATRIX SIZE IS %d IT TOOK TIME: %lf\n", size, dim, t1);
                //fclose(pfile);
                //printf("NCORES %d MATRIX SIZE %d TIME %lf\n", size, dim, t1);
                cout <<  size << "," <<dim << "," << MAX_IT << "," <<t1 << endl;
            }

    }
    }

    MPI_Finalize();
    return(0);
}