1. def dot(A, B, out=None):
  2.     """ A drop in replaement for numpy.dot
  3.    Computes A.B optimized using fblas call """
  4.     import scipy.linalg as sp
  5.     gemm = sp.get_blas_funcs('gemm', arrays=(A,B))
  6.  
  7.     if out is None:
  8.         lda, x, y, ldb = A.shape + B.shape
  9.         if x != y:
  10.             raise ValueError("matrices are not aligned")
  11.         dtype = np.max([x.dtype for x in (A, B)])
  12.         out = np.empty((lda, ldb), dtype, order='F')
  13.  
  14.     if A.flags.c_contiguous and B.flags.c_contiguous:
  15.         gemm(alpha=1., a=A.T, b=B.T, trans_a=True, trans_b=True, c=out, overwrite_c=True)
  16.     if A.flags.c_contiguous and B.flags.f_contiguous:
  17.         gemm(alpha=1., a=A.T, b=B, trans_a=True, c=out, overwrite_c=True)
  18.     if A.flags.f_contiguous and B.flags.c_contiguous:
  19.         gemm(alpha=1., a=A, b=B.T, trans_b=True, c=out, overwrite_c=True)
  20.     if A.flags.f_contiguous and B.flags.f_contiguous:
  21.         gemm(alpha=1., a=A, b=B, c=out, overwrite_c=True)
  22.     return out