Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ## file foo.f90 ##
- function pixsolve(A, b) result(x)
- implicit none
- real*8 :: A(:,:,:), b(:,:), x(size(b,1),size(b,2))
- integer*4 :: i, n, m, piv(size(b,2)), err
- n = size(A,1)
- m = size(A,2)
- x = b
- do i = 1, n
- call dgesv(m, 1, A(i,:,:), m, piv, x(i,:), m, err)
- end do
- end function
- ## file foo_faster.f90 ##
- function pixsolve(A, b) result(x)
- implicit none
- real*8 :: A(:,:,:), b(:,:), x(size(b,1),size(b,2))
- integer*4 :: i, n, m, piv(size(b,1)), err
- n = size(A,3)
- m = size(A,1)
- x = b
- do i = 1, n
- call dgesv(m, 1, A(:,:, i), m, piv, x(:, i), m, err)
- end do
- end function
- ## Run f2py ##
- f2py -c -m foo{,.f90} -llapack -lblas
- f2py -c -m foo_faster{,.f90} -llapack -lblas
- ## file test.ipy (to be ran in IPython) ##
- import numpy as np
- from foo import pixsolve
- from foo_faster import pixsolve as pixsolve_faster
- n = 600
- A = np.random.rand(n, 3, 3)
- b = np.random.rand(n, 3)
- A0 = A.copy() # Store a copy to check solutions, dgesv modifies the input matrix
- x1 = pixsolve(A, b) # slightly faster (5%) if A, b were in Fortran order
- print max(np.linalg.norm(np.dot(A0[i], x1[i]) - b[i]) for i in xrange(n))
- A = A0.copy()
- x2 = pixsolve_faster(A.T, b.T).T
- # But you're solving for the transpose of each of your matrices!!
- print max(np.linalg.norm(np.dot(A0[i].T, x2[i]) - b[i]) for i in xrange(n))
- A = A0.copy()
- x3 = pixsolve_faster(A.transpose((1, 2, 0)), b.T).T
- print max(np.linalg.norm(np.dot(A0[i], x3[i]) - b[i]) for i in xrange(n))
- %timeit pixsolve(A, b)
- %timeit pixsolve_faster(A.T, b.T).T
- %timeit pixsolve_faster(A.transpose((1, 2, 0)), b.T).T
- ## Results ##
- 3.47321633195e-14
- 4.0549756099e-14
- 3.47321633195e-14
- 1000 loops, best of 3: 830 us per loop
- 1000 loops, best of 3: 655 us per loop
- 1000 loops, best of 3: 688 us per loop
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement