SHOW:
|
|
- or go back to the newest paste.
1 | ## file foo.f90 ## | |
2 | function pixsolve(A, b) result(x) | |
3 | implicit none | |
4 | real*8 :: A(:,:,:), b(:,:), x(size(b,1),size(b,2)) | |
5 | integer*4 :: i, n, m, piv(size(b,2)), err | |
6 | n = size(A,1) | |
7 | m = size(A,2) | |
8 | x = b | |
9 | do i = 1, n | |
10 | call dgesv(m, 1, A(i,:,:), m, piv, x(i,:), m, err) | |
11 | end do | |
12 | end function | |
13 | ||
14 | - | ! f2py -c -m foo{,.f90} -llapack -lblas |
14 | + | ## file foo_faster.f90 ## |
15 | - | ! python |
15 | + | |
16 | - | ! >>> import numpy as np |
16 | + | |
17 | - | ! >>> from foo import pixsolve |
17 | + | |
18 | - | ! >>> n = 6 |
18 | + | integer*4 :: i, n, m, piv(size(b,1)), err |
19 | - | ! >>> A = np.random.rand(n, 3, 3) |
19 | + | n = size(A,3) |
20 | - | ! >>> b = np.random.rand(n, 3) |
20 | + | m = size(A,1) |
21 | - | ! >>> x = pixsolve(A, b) # slightly faster (5%) if A, b are in Fortran order |
21 | + | |
22 | - | ! >>> max(np.linalg.norm(np.dot(A[i], x[i]) - b[i]) for i in xrange(n)) |
22 | + | |
23 | - | ! 2.603703785810335e-16 |
23 | + | call dgesv(m, 1, A(:,:, i), m, piv, x(:, i), m, err) |
24 | end do | |
25 | end function | |
26 | ||
27 | ## Run f2py ## | |
28 | f2py -c -m foo{,.f90} -llapack -lblas | |
29 | f2py -c -m foo_faster{,.f90} -llapack -lblas | |
30 | ||
31 | ## file test.ipy (to be ran in IPython) ## | |
32 | import numpy as np | |
33 | from foo import pixsolve | |
34 | from foo_faster import pixsolve as pixsolve_faster | |
35 | ||
36 | n = 600 | |
37 | A = np.random.rand(n, 3, 3) | |
38 | b = np.random.rand(n, 3) | |
39 | A0 = A.copy() # Store a copy to check solutions, dgesv modifies the input matrix | |
40 | ||
41 | x1 = pixsolve(A, b) # slightly faster (5%) if A, b were in Fortran order | |
42 | print max(np.linalg.norm(np.dot(A0[i], x1[i]) - b[i]) for i in xrange(n)) | |
43 | ||
44 | A = A0.copy() | |
45 | x2 = pixsolve_faster(A.T, b.T).T | |
46 | # But you're solving for the transpose of each of your matrices!! | |
47 | print max(np.linalg.norm(np.dot(A0[i].T, x2[i]) - b[i]) for i in xrange(n)) | |
48 | ||
49 | A = A0.copy() | |
50 | x3 = pixsolve_faster(A.transpose((1, 2, 0)), b.T).T | |
51 | print max(np.linalg.norm(np.dot(A0[i], x3[i]) - b[i]) for i in xrange(n)) | |
52 | ||
53 | ||
54 | %timeit pixsolve(A, b) | |
55 | %timeit pixsolve_faster(A.T, b.T).T | |
56 | %timeit pixsolve_faster(A.transpose((1, 2, 0)), b.T).T | |
57 | ||
58 | ## Results ## | |
59 | 3.47321633195e-14 | |
60 | 4.0549756099e-14 | |
61 | 3.47321633195e-14 | |
62 | 1000 loops, best of 3: 830 us per loop | |
63 | 1000 loops, best of 3: 655 us per loop | |
64 | 1000 loops, best of 3: 688 us per loop |