Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- A very simple mex-implementation for testing the Fliege's algorithm.
- Copyright (C) 2013 Antti Lipponen (Antti.Lipponen@iki.fi)
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- -------------------------------------------------------------------------
- The algorithm implemented here is based on the algorithm published in
- "A Randomized Parallel Algorithm with Run Time $O(n^2)$ for Solving an
- $n \times n$ System of Linear Equations", http://arxiv.org/abs/1209.3995v1
- I also took some ideas from Python code by Riccardo Murri and Benjamin Jonen (thanks!),
- https://code.google.com/p/gc3pie/source/browse/#svn%2Ftrunk%2Fgc3pie%2Fexamples%2Foptimizer%2Flinear-systems-solver
- TO RUN THE TEST:
- ****************
- Compile to mex-file in Matlab for example with:
- mex solveLinmex.cpp CXXFLAGS="\$CFLAGS -fopenmp" LDFLAGS="\$LDFLAGS -fopenmp" LDOPTIMFLAGS="-O3" CXXOPTIMFLAGS="-O3 -DNDEBUG"
- Test script:
- N = 1000;
- A = randn(N,N);
- b = randn(N,1);
- tic
- x_backslash = A\b;
- toc
- tic
- x_fliege = solveLinmex(A',b, randn(N,N+1));
- toc
- Result:
- Elapsed time for x_backslash is 0.057052 seconds.
- Elapsed time for x_fliege is 0.655128 seconds.
- */
- #include "mex.h"
- void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
- {
- const unsigned int n = (int) mxGetM(prhs[0]);
- const unsigned int m = (int) mxGetN(prhs[0]);
- const double * const A = mxGetPr(prhs[0]);
- const double * const b = mxGetPr(prhs[1]);
- double * const v1 = mxGetPr(prhs[2]);
- double * const v2 = new double[(n+1)*n];
- double * const av = new double[n+1];
- for(unsigned int k=0; k<m; ++k) {
- const double * const AA = A + k*n;
- const double * const bk = b + k;
- double * const vcurrent = (k%2) ? v2 : v1;
- double * const xcurrent = (k%2) ? v1 : v2;
- #pragma omp parallel for
- for(unsigned int i=0; i<n+1; ++i) {
- const double * const vv = vcurrent + i*n;
- av[i] = 0.0;
- for(unsigned int j=0; j<n; ++j) {
- av[i] += AA[j]*vv[j];
- }
- }
- #pragma omp parallel for
- for(unsigned int i=0; i<n+1; ++i) {
- const double * const vv = vcurrent + ((i<n) ? (i+1)*n : 0);
- const double * const uu = vcurrent + i*n;
- double * const xx = xcurrent + i*n;
- const double t = ((*bk)-av[i])/(av[(i<n) ? (i+1) : 0]-av[i]);
- for(unsigned int j=0; j<n; ++j) {
- xx[j] = t*(vv[j]-uu[j]) + uu[j];
- }
- }
- }
- plhs[0] = mxCreateDoubleMatrix(n, 1, mxREAL);
- double *xvec = mxGetPr(plhs[0]);
- double * const vfinal = (m%2) ? v2 : v1;
- for(unsigned int i=0; i<m; ++i) {
- xvec[i] = vfinal[i];
- }
- delete [] av;
- delete [] v2;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement