Advertisement
Guest User

Untitled

a guest
Nov 18th, 2012
162
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. ## file foo.f90 ##
  2. function pixsolve(A, b) result(x)
  3.     implicit none
  4.     real*8    :: A(:,:,:), b(:,:), x(size(b,1),size(b,2))
  5.     integer*4 :: i, n, m, piv(size(b,2)), err
  6.     n = size(A,1)
  7.     m = size(A,2)
  8.     x = b
  9.     do i = 1, n
  10.         call dgesv(m, 1, A(i,:,:), m, piv, x(i,:), m, err)
  11.     end do
  12. end function
  13.  
  14. ## file foo_faster.f90 ##
  15. function pixsolve(A, b) result(x)
  16.     implicit none
  17.     real*8    :: A(:,:,:), b(:,:), x(size(b,1),size(b,2))
  18.     integer*4 :: i, n, m, piv(size(b,1)), err
  19.     n = size(A,3)
  20.     m = size(A,1)
  21.     x = b
  22.     do i = 1, n
  23.         call dgesv(m, 1, A(:,:, i), m, piv, x(:, i), m, err)
  24.     end do
  25. end function
  26.  
  27. ## Run f2py ##
  28. f2py -c -m foo{,.f90} -llapack -lblas
  29. f2py -c -m foo_faster{,.f90} -llapack -lblas
  30.  
  31. ## file test.ipy (to be ran in IPython) ##
  32. import numpy as np
  33. from foo import pixsolve
  34. from foo_faster import pixsolve as pixsolve_faster
  35.  
  36. n = 600
  37. A = np.random.rand(n, 3, 3)
  38. b = np.random.rand(n, 3)
  39. A0 = A.copy()  # Store a copy to check solutions, dgesv modifies the input matrix
  40.  
  41. x1 = pixsolve(A, b)  # slightly faster (5%) if A, b were in Fortran order
  42. print max(np.linalg.norm(np.dot(A0[i], x1[i]) - b[i]) for i in xrange(n))
  43.  
  44. A = A0.copy()
  45. x2 = pixsolve_faster(A.T, b.T).T
  46. # But you're solving for the transpose of each of your matrices!!
  47. print max(np.linalg.norm(np.dot(A0[i].T, x2[i]) - b[i]) for i in xrange(n))
  48.  
  49. A = A0.copy()
  50. x3 = pixsolve_faster(A.transpose((1, 2, 0)), b.T).T
  51. print max(np.linalg.norm(np.dot(A0[i], x3[i]) - b[i]) for i in xrange(n))
  52.  
  53.  
  54. %timeit pixsolve(A, b)
  55. %timeit pixsolve_faster(A.T, b.T).T
  56. %timeit pixsolve_faster(A.transpose((1, 2, 0)), b.T).T
  57.  
  58. ## Results ##
  59. 3.47321633195e-14
  60. 4.0549756099e-14
  61. 3.47321633195e-14
  62. 1000 loops, best of 3: 830 us per loop
  63. 1000 loops, best of 3: 655 us per loop
  64. 1000 loops, best of 3: 688 us per loop
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement