SHOW:
|
|
- or go back to the newest paste.
| 1 | ## file foo.f90 ## | |
| 2 | function pixsolve(A, b) result(x) | |
| 3 | implicit none | |
| 4 | real*8 :: A(:,:,:), b(:,:), x(size(b,1),size(b,2)) | |
| 5 | integer*4 :: i, n, m, piv(size(b,2)), err | |
| 6 | n = size(A,1) | |
| 7 | m = size(A,2) | |
| 8 | x = b | |
| 9 | do i = 1, n | |
| 10 | call dgesv(m, 1, A(i,:,:), m, piv, x(i,:), m, err) | |
| 11 | end do | |
| 12 | end function | |
| 13 | ||
| 14 | - | ! f2py -c -m foo{,.f90} -llapack -lblas
|
| 14 | + | ## file foo_faster.f90 ## |
| 15 | - | ! python |
| 15 | + | |
| 16 | - | ! >>> import numpy as np |
| 16 | + | |
| 17 | - | ! >>> from foo import pixsolve |
| 17 | + | |
| 18 | - | ! >>> n = 6 |
| 18 | + | integer*4 :: i, n, m, piv(size(b,1)), err |
| 19 | - | ! >>> A = np.random.rand(n, 3, 3) |
| 19 | + | n = size(A,3) |
| 20 | - | ! >>> b = np.random.rand(n, 3) |
| 20 | + | m = size(A,1) |
| 21 | - | ! >>> x = pixsolve(A, b) # slightly faster (5%) if A, b are in Fortran order |
| 21 | + | |
| 22 | - | ! >>> max(np.linalg.norm(np.dot(A[i], x[i]) - b[i]) for i in xrange(n)) |
| 22 | + | |
| 23 | - | ! 2.603703785810335e-16 |
| 23 | + | call dgesv(m, 1, A(:,:, i), m, piv, x(:, i), m, err) |
| 24 | end do | |
| 25 | end function | |
| 26 | ||
| 27 | ## Run f2py ## | |
| 28 | f2py -c -m foo{,.f90} -llapack -lblas
| |
| 29 | f2py -c -m foo_faster{,.f90} -llapack -lblas
| |
| 30 | ||
| 31 | ## file test.ipy (to be ran in IPython) ## | |
| 32 | import numpy as np | |
| 33 | from foo import pixsolve | |
| 34 | from foo_faster import pixsolve as pixsolve_faster | |
| 35 | ||
| 36 | n = 600 | |
| 37 | A = np.random.rand(n, 3, 3) | |
| 38 | b = np.random.rand(n, 3) | |
| 39 | A0 = A.copy() # Store a copy to check solutions, dgesv modifies the input matrix | |
| 40 | ||
| 41 | x1 = pixsolve(A, b) # slightly faster (5%) if A, b were in Fortran order | |
| 42 | print max(np.linalg.norm(np.dot(A0[i], x1[i]) - b[i]) for i in xrange(n)) | |
| 43 | ||
| 44 | A = A0.copy() | |
| 45 | x2 = pixsolve_faster(A.T, b.T).T | |
| 46 | # But you're solving for the transpose of each of your matrices!! | |
| 47 | print max(np.linalg.norm(np.dot(A0[i].T, x2[i]) - b[i]) for i in xrange(n)) | |
| 48 | ||
| 49 | A = A0.copy() | |
| 50 | x3 = pixsolve_faster(A.transpose((1, 2, 0)), b.T).T | |
| 51 | print max(np.linalg.norm(np.dot(A0[i], x3[i]) - b[i]) for i in xrange(n)) | |
| 52 | ||
| 53 | ||
| 54 | %timeit pixsolve(A, b) | |
| 55 | %timeit pixsolve_faster(A.T, b.T).T | |
| 56 | %timeit pixsolve_faster(A.transpose((1, 2, 0)), b.T).T | |
| 57 | ||
| 58 | ## Results ## | |
| 59 | 3.47321633195e-14 | |
| 60 | 4.0549756099e-14 | |
| 61 | 3.47321633195e-14 | |
| 62 | 1000 loops, best of 3: 830 us per loop | |
| 63 | 1000 loops, best of 3: 655 us per loop | |
| 64 | 1000 loops, best of 3: 688 us per loop |