Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- using Libdl
- using LinearAlgebra # For BlasInt, I, and general linear algebra types
- # --- Define Library Paths ---
- # IMPORTANT: Ensure these paths are correct for your system.
- # Replace with your actual AOCL installation paths.
- const LIBBLIS_ILP64_PATH = "C:\\Program Files\\AMD\\AOCL-Windows\\amd-blis\\lib\\ILP64\\AOCL-LibBlis-Win-MT-dll.dll"
- const LIBBLIS_LP64_PATH = "C:\\Program Files\\AMD\\AOCL-Windows\\amd-blis\\lib\\LP64\\AOCL-LibBlis-Win-MT-dll.dll"
- const LIBFLAME_ILP64_PATH = "C:\\Program Files\\AMD\\AOCL-Windows\\amd-libflame\\lib\\ILP64\\AOCL-LibFlame-Win-MT-dll.dll"
- const LIBFLAME_LP64_PATH = "C:\\Program Files\\AMD\\AOCL-Windows\\amd-libflame\\lib\\LP64\\AOCL-LibFlame-Win-MT-dll.dll"
- # --- Global Library Handles ---
- # These will hold the handles to the loaded DLLs.
- # Using `global` allows them to be modified within `try-catch` blocks.
- global blis_ilp64_handle = C_NULL
- global blis_lp64_handle = C_NULL
- global flame_ilp64_handle = C_NULL
- global flame_lp64_handle = C_NULL
- # Function to load a library safely
- function load_library(path::String)
- handle = C_NULL
- try
- handle = Libdl.dlopen(path)
- println("Successfully loaded library: $path") # Keep this for loading feedback
- catch e
- @error "Could not load library: $path. Please ensure the path is correct and dependencies are met." exception=(e, catch_backtrace())
- end
- return handle
- end
- # Load all necessary libraries at startup
- blis_ilp64_handle = load_library(LIBBLIS_ILP64_PATH)
- blis_lp64_handle = load_library(LIBBLIS_LP64_PATH)
- flame_ilp64_handle = load_library(LIBFLAME_ILP64_PATH)
- flame_lp64_handle = load_library(LIBFLAME_LP64_PATH)
- # --- Helper for BLAS/LAPACK function calls ---
- # This function is a generic dispatcher for ccall.
- # It checks if the library handle is valid before attempting to call the function.
- function call_external_function(lib_handle, func_name::Symbol, ret_type::DataType, arg_types::Tuple, args...)
- if lib_handle === C_NULL
- error("Library for function :$func_name is not loaded.")
- end
- func_ptr = Libdl.dlsym(lib_handle, func_name)
- if func_ptr == C_NULL
- error("Function :$func_name not found in library.")
- end
- return ccall(func_ptr, ret_type, arg_types, args...)
- end
- # --- BLAS Level 1 Functions (Vector-Vector Operations) ---
- # AXPY: y = alpha*x + y
- # s/daxpy (single/double precision)
- function aocl_saxpy!(y::Vector{Float32}, alpha::Float32, x::Vector{Float32})
- n = length(x)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- call_external_function(handle, :saxpy_, Cvoid,
- (Ref{BlasIntType}, Ref{Float32}, Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType}),
- BlasIntType(n), alpha, x, BlasIntType(1), y, BlasIntType(1))
- return y
- end
- function aocl_daxpy!(y::Vector{Float64}, alpha::Float64, x::Vector{Float64})
- n = length(x)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- call_external_function(handle, :daxpy_, Cvoid,
- (Ref{BlasIntType}, Ref{Float64}, Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}),
- BlasIntType(n), alpha, x, BlasIntType(1), y, BlasIntType(1))
- return y
- end
- # DOT: dot product of two vectors
- # s/ddot (single/double precision)
- function aocl_sdot(x::Vector{Float32}, y::Vector{Float32})
- n = length(x)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- result = Ref{Float32}(0.0)
- call_external_function(handle, :sdot_, Cvoid,
- (Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType}, Ref{Float32}),
- BlasIntType(n), x, BlasIntType(1), y, BlasIntType(1), result)
- return result[]
- end
- function aocl_ddot(x::Vector{Float64}, y::Vector{Float64})
- n = length(x)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- result = Ref{Float64}(0.0)
- call_external_function(handle, :ddot_, Cvoid,
- (Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}, Ref{Float64}),
- BlasIntType(n), x, BlasIntType(1), y, BlasIntType(1), result)
- return result[]
- end
- # SCAL: x = alpha*x
- # s/dscal (single/double precision)
- function aocl_sscal!(x::Vector{Float32}, alpha::Float32)
- n = length(x)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- call_external_function(handle, :sscal_, Cvoid,
- (Ref{BlasIntType}, Ref{Float32}, Ptr{Float32}, Ref{BlasIntType}),
- BlasIntType(n), alpha, x, BlasIntType(1))
- return x
- end
- function aocl_dscal!(x::Vector{Float64}, alpha::Float64)
- n = length(x)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- call_external_function(handle, :dscal_, Cvoid,
- (Ref{BlasIntType}, Ref{Float64}, Ptr{Float64}, Ref{BlasIntType}),
- BlasIntType(n), alpha, x, BlasIntType(1))
- return x
- end
- # SWAP: swap two vectors
- # s/dswap (single/double precision)
- function aocl_sswap!(x::Vector{Float32}, y::Vector{Float32})
- n = length(x)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- call_external_function(handle, :sswap_, Cvoid,
- (Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType}),
- BlasIntType(n), x, BlasIntType(1), y, BlasIntType(1))
- return x, y
- end
- function aocl_dswap!(x::Vector{Float64}, y::Vector{Float64})
- n = length(x)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- call_external_function(handle, :dswap_, Cvoid,
- (Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}),
- BlasIntType(n), x, BlasIntType(1), y, BlasIntType(1))
- return x, y
- end
- # NRM2: Euclidean norm of a vector
- # s/dnrm2 (single/double precision)
- function aocl_snrm2(x::Vector{Float32})
- n = length(x)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- return call_external_function(handle, :snrm2_, Float32,
- (Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType}),
- BlasIntType(n), x, BlasIntType(1))
- end
- function aocl_dnrm2(x::Vector{Float64})
- n = length(x)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- return call_external_function(handle, :dnrm2_, Float64,
- (Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}),
- BlasIntType(n), x, BlasIntType(1))
- end
- # ASUM: sum of absolute values
- # s/dasum (single/double precision)
- function aocl_sasum(x::Vector{Float32})
- n = length(x)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- return call_external_function(handle, :sasum_, Float32,
- (Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType}),
- BlasIntType(n), x, BlasIntType(1))
- end
- function aocl_dasum(x::Vector{Float64})
- n = length(x)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- return call_external_function(handle, :dasum_, Float64,
- (Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}),
- BlasIntType(n), x, BlasIntType(1))
- end
- # IAMIN: index of element with smallest absolute value
- # is/idamin (single/double precision)
- function aocl_isamin(x::Vector{Float32})
- n = length(x)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- # BLAS functions return 1-based index
- return call_external_function(handle, :isamin_, BlasIntType,
- (Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType}),
- BlasIntType(n), x, BlasIntType(1))
- end
- function aocl_idamin(x::Vector{Float64})
- n = length(x)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- # BLAS functions return 1-based index
- return call_external_function(handle, :idamin_, BlasIntType,
- (Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}),
- BlasIntType(n), x, BlasIntType(1))
- end
- # IAMAX: index of element with largest absolute value
- # is/idmax (single/double precision)
- function aocl_isamax(x::Vector{Float32})
- n = length(x)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- # BLAS functions return 1-based index
- return call_external_function(handle, :isamax_, BlasIntType,
- (Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType}),
- BlasIntType(n), x, BlasIntType(1))
- end
- function aocl_idmax(x::Vector{Float64})
- n = length(x)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- # BLAS functions return 1-based index
- return call_external_function(handle, :idmax_, BlasIntType,
- (Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}),
- BlasIntType(n), x, BlasIntType(1))
- end
- # ROTG: Generate Givens rotation
- # s/drotg (single/double precision)
- function aocl_srotg!(a::Ref{Float32}, b::Ref{Float32}, c::Ref{Float32}, s::Ref{Float32})
- handle = blis_lp64_handle # ROTG is typically LP64
- call_external_function(handle, :srotg_, Cvoid,
- (Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}),
- a, b, c, s)
- return a[], b[], c[], s[]
- end
- function aocl_drotg!(a::Ref{Float64}, b::Ref{Float64}, c::Ref{Float64}, s::Ref{Float64})
- handle = blis_ilp64_handle # ROTG is typically LP64, but using ILP64 handle for consistency
- call_external_function(handle, :drotg_, Cvoid,
- (Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Ptr{Float64}),
- a, b, c, s)
- return a[], b[], c[], s[]
- end
- # ROTMG: Generate modified Givens rotation matrix
- # s/drotmg (single/double precision)
- function aocl_srotmg!(d1::Ref{Float32}, d2::Ref{Float32}, x1::Ref{Float32}, y1::Float32, param::Vector{Float32})
- handle = blis_lp64_handle # ROTMG is typically LP64
- call_external_function(handle, :srotmg_, Cvoid,
- (Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Ref{Float32}, Ptr{Float32}),
- d1, d2, x1, y1, param)
- return d1[], d2[], x1[], y1, param
- end
- function aocl_drotmg!(d1::Ref{Float64}, d2::Ref{Float64}, x1::Ref{Float64}, y1::Float64, param::Vector{Float64})
- handle = blis_ilp64_handle # ROTMG is typically LP64, but using ILP64 handle for consistency
- call_external_function(handle, :drotmg_, Cvoid,
- (Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Ref{Float64}, Ptr{Float64}),
- d1, d2, x1, y1, param)
- return d1[], d2[], x1[], y1, param
- end
- # ROT: Apply Givens rotation
- # s/drot (single/double precision)
- function aocl_srot!(x::Vector{Float32}, y::Vector{Float32}, c::Float32, s::Float32)
- n = length(x)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- call_external_function(handle, :srot_, Cvoid,
- (Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType}, Ref{Float32}, Ref{Float32}),
- BlasIntType(n), x, BlasIntType(1), y, BlasIntType(1), c, s)
- return x, y
- end
- function aocl_drot!(x::Vector{Float64}, y::Vector{Float64}, c::Float64, s::Float64)
- n = length(x)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- call_external_function(handle, :drot_, Cvoid,
- (Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}, Ref{Float64}, Ref{Float64}),
- BlasIntType(n), x, BlasIntType(1), y, BlasIntType(1), c, s)
- return x, y
- end
- # COPY: copy one vector to another
- # s/dcopy (single/double precision)
- function aocl_scopy!(y::Vector{Float32}, x::Vector{Float32})
- n = length(x)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- call_external_function(handle, :scopy_, Cvoid,
- (Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType}),
- BlasIntType(n), x, BlasIntType(1), y, BlasIntType(1))
- return y
- end
- function aocl_dcopy!(y::Vector{Float64}, x::Vector{Float64})
- n = length(x)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- call_external_function(handle, :dcopy_, Cvoid,
- (Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}),
- BlasIntType(n), x, BlasIntType(1), y, BlasIntType(1))
- return y
- end
- # --- BLAS Level 2 Functions (Matrix-Vector Operations) ---
- # GEMV: y = alpha*A*x + beta*y
- # s/dgemv (single/double precision)
- function aocl_sgemv!(trans::Char, A::Matrix{Float32}, x::Vector{Float32}, y::Vector{Float32}, alpha::Float32, beta::Float32)
- m, n = size(A)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(max(1, stride(A,2)))
- call_external_function(handle, :sgemv_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{Float32},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ref{Float32}, Ptr{Float32}, Ref{BlasIntType}),
- trans, BlasIntType(m), BlasIntType(n), alpha, A, lda, x, BlasIntType(1), beta, y, BlasIntType(1))
- return y
- end
- function aocl_dgemv!(trans::Char, A::Matrix{Float64}, x::Vector{Float64}, y::Vector{Float64}, alpha::Float64, beta::Float64)
- m, n = size(A)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(max(1, stride(A,2)))
- call_external_function(handle, :dgemv_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{Float64},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ref{Float64}, Ptr{Float64}, Ref{BlasIntType}),
- trans, BlasIntType(m), BlasIntType(n), alpha, A, lda, x, BlasIntType(1), beta, y, BlasIntType(1))
- return y
- end
- # GER: A = alpha*x*y' + A (Rank-1 update)
- # s/dger (single/double precision)
- function aocl_sger!(A::Matrix{Float32}, alpha::Float32, x::Vector{Float32}, y::Vector{Float32})
- m, n = size(A)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(max(1, stride(A,2)))
- call_external_function(handle, :sger_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ref{Float32},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}),
- BlasIntType(m), BlasIntType(n), alpha, x, BlasIntType(1), y, BlasIntType(1), A, lda)
- return A
- end
- function aocl_dger!(A::Matrix{Float64}, alpha::Float64, x::Vector{Float64}, y::Vector{Float64})
- m, n = size(A)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(max(1, stride(A,2)))
- call_external_function(handle, :dger_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ref{Float64},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}),
- BlasIntType(m), BlasIntType(n), alpha, x, BlasIntType(1), y, BlasIntType(1), A, lda)
- return A
- end
- # TRMV: x = A*x or x = A'*x (Triangular Matrix-Vector Multiply)
- # s/dtrmv (single/double precision)
- function aocl_strmv!(uplo::Char, trans::Char, diag::Char, A::Matrix{Float32}, x::Vector{Float32})
- n = size(A, 1)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(max(1, stride(A,2)))
- call_external_function(handle, :strmv_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}),
- uplo, trans, diag, BlasIntType(n), A, lda, x, BlasIntType(1))
- return x
- end
- function aocl_dtrmv!(uplo::Char, trans::Char, diag::Char, A::Matrix{Float64}, x::Vector{Float64})
- n = size(A, 1)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(max(1, stride(A,2)))
- call_external_function(handle, :dtrmv_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}),
- uplo, trans, diag, BlasIntType(n), A, lda, x, BlasIntType(1))
- return x
- end
- # TRSV: Solve A*x = b or A'*x = b (Triangular Solve)
- # s/dtrsv (single/double precision)
- function aocl_strsv!(uplo::Char, trans::Char, diag::Char, A::Matrix{Float32}, x::Vector{Float32})
- n = size(A, 1)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(max(1, stride(A,2)))
- call_external_function(handle, :strsv_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}),
- uplo, trans, diag, BlasIntType(n), A, lda, x, BlasIntType(1))
- return x
- end
- function aocl_dtrsv!(uplo::Char, trans::Char, diag::Char, A::Matrix{Float64}, x::Vector{Float64})
- n = size(A, 1)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(max(1, stride(A,2)))
- call_external_function(handle, :dtrsv_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}),
- uplo, trans, diag, BlasIntType(n), A, lda, x, BlasIntType(1))
- return x
- end
- # SYMV: y = alpha*A*x + beta*y (Symmetric Matrix-Vector Multiply)
- # s/dsymv (single/double precision)
- function aocl_ssymv!(uplo::Char, A::Matrix{Float32}, x::Vector{Float32}, y::Vector{Float32}, alpha::Float32, beta::Float32)
- n = size(A, 1)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(max(1, stride(A,2)))
- call_external_function(handle, :ssymv_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{Float32}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ref{Float32}, Ptr{Float32}, Ref{BlasIntType}),
- uplo, BlasIntType(n), alpha, A, lda, x, BlasIntType(1), beta, y, BlasIntType(1))
- return y
- end
- function aocl_dsymv!(uplo::Char, A::Matrix{Float64}, x::Vector{Float64}, y::Vector{Float64}, alpha::Float64, beta::Float64)
- n = size(A, 1)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(max(1, stride(A,2)))
- call_external_function(handle, :dsymv_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{Float64}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ref{Float64}, Ptr{Float64}, Ref{BlasIntType}),
- uplo, BlasIntType(n), alpha, A, lda, x, BlasIntType(1), beta, y, BlasIntType(1))
- return y
- end
- # SYR: A = alpha*x*x' + A (Rank-1 update of a symmetric matrix)
- # s/dsyr (single/double precision)
- function aocl_ssyr!(uplo::Char, A::Matrix{Float32}, alpha::Float32, x::Vector{Float32})
- n = size(A, 1)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(max(1, stride(A,2)))
- call_external_function(handle, :ssyr_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{Float32}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}),
- uplo, BlasIntType(n), alpha, x, BlasIntType(1), A, lda)
- return A
- end
- function aocl_dsyr!(uplo::Char, A::Matrix{Float64}, alpha::Float64, x::Vector{Float64})
- n = size(A, 1)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(max(1, stride(A,2)))
- call_external_function(handle, :dsyr_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{Float64}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}),
- uplo, BlasIntType(n), alpha, x, BlasIntType(1), A, lda)
- return A
- end
- # SYR2: A = alpha*x*y' + alpha*y*x' + A (Rank-2 update of a symmetric matrix)
- # s/dsyr2 (single/double precision)
- function aocl_ssyr2!(uplo::Char, A::Matrix{Float32}, alpha::Float32, x::Vector{Float32}, y::Vector{Float32})
- n = size(A, 1)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(max(1, stride(A,2)))
- call_external_function(handle, :ssyr2_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{Float32}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType}),
- uplo, BlasIntType(n), alpha, x, BlasIntType(1), y, BlasIntType(1), A, lda)
- return A
- end
- function aocl_dsyr2!(uplo::Char, A::Matrix{Float64}, alpha::Float64, x::Vector{Float64}, y::Vector{Float64})
- n = size(A, 1)
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(max(1, stride(A,2)))
- call_external_function(handle, :dsyr2_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{Float64}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}),
- uplo, BlasIntType(n), alpha, x, BlasIntType(1), y, BlasIntType(1), A, lda)
- return A
- end
- # GBMV: y = alpha*A*x + beta*y (General Band Matrix-Vector product)
- # s/dgbmv (single/double precision)
- function aocl_sgbmv!(trans::Char, m::Int, n::Int, kl::Int, ku::Int, alpha::Float32, A::Matrix{Float32}, x::Vector{Float32}, beta::Float32, y::Vector{Float32})
- handle = max(m, n) > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = max(m, n) > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(size(A, 1)) # LDA for banded matrix is KL + KU + 1
- incx = BlasIntType(1)
- incy = BlasIntType(1)
- call_external_function(handle, :sgbmv_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{Float32},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ref{Float32}, Ptr{Float32}, Ref{BlasIntType}),
- trans, BlasIntType(m), BlasIntType(n), BlasIntType(kl), BlasIntType(ku), alpha,
- A, lda, x, incx, beta, y, incy)
- return y
- end
- function aocl_dgbmv!(trans::Char, m::Int, n::Int, kl::Int, ku::Int, alpha::Float64, A::Matrix{Float64}, x::Vector{Float64}, beta::Float64, y::Vector{Float64})
- handle = max(m, n) > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = max(m, n) > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(size(A, 1)) # LDA for banded matrix is KL + KU + 1
- incx = BlasIntType(1)
- incy = BlasIntType(1)
- call_external_function(handle, :dgbmv_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{Float64},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ref{Float64}, Ptr{Float64}, Ref{BlasIntType}),
- trans, BlasIntType(m), BlasIntType(n), BlasIntType(kl), BlasIntType(ku), alpha,
- A, lda, x, incx, beta, y, incy)
- return y
- end
- # SBMV: y = alpha*A*x + beta*y (Symmetric Band Matrix-Vector product)
- # s/dsbmv (single/double precision)
- function aocl_ssbmv!(uplo::Char, n::Int, k::Int, alpha::Float32, A::Matrix{Float32}, x::Vector{Float32}, beta::Float32, y::Vector{Float32})
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(size(A, 1)) # LDA for symmetric band matrix is K + 1
- incx = BlasIntType(1)
- incy = BlasIntType(1)
- call_external_function(handle, :ssbmv_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{Float32},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ref{Float32}, Ptr{Float32}, Ref{BlasIntType}),
- uplo, BlasIntType(n), BlasIntType(k), alpha, A, lda, x, incx, beta, y, incy)
- return y
- end
- function aocl_dsbmv!(uplo::Char, n::Int, k::Int, alpha::Float64, A::Matrix{Float64}, x::Vector{Float64}, beta::Float64, y::Vector{Float64})
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(size(A, 1)) # LDA for symmetric band matrix is K + 1
- incx = BlasIntType(1)
- incy = BlasIntType(1)
- call_external_function(handle, :dsbmv_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{Float64},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ref{Float64}, Ptr{Float64}, Ref{BlasIntType}),
- uplo, BlasIntType(n), BlasIntType(k), alpha, A, lda, x, incx, beta, y, incy)
- return y
- end
- # SPMV: y = alpha*A*x + beta*y (Symmetric Packed Matrix-Vector product)
- # s/dspmv (single/double precision)
- function aocl_sspmv!(uplo::Char, n::Int, alpha::Float32, AP::Vector{Float32}, x::Vector{Float32}, beta::Float32, y::Vector{Float32})
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- incx = BlasIntType(1)
- incy = BlasIntType(1)
- call_external_function(handle, :sspmv_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{Float32}, Ptr{Float32}, Ptr{Float32}, Ref{BlasIntType},
- Ref{Float32}, Ptr{Float32}, Ref{BlasIntType}),
- uplo, BlasIntType(n), alpha, AP, x, incx, beta, y, incy)
- return y
- end
- function aocl_dspmv!(uplo::Char, n::Int, alpha::Float64, AP::Vector{Float64}, x::Vector{Float64}, beta::Float64, y::Vector{Float64})
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- incx = BlasIntType(1)
- incy = BlasIntType(1)
- call_external_function(handle, :dspmv_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{Float64}, Ptr{Float64}, Ptr{Float64}, Ref{BlasIntType},
- Ref{Float64}, Ptr{Float64}, Ref{BlasIntType}),
- uplo, BlasIntType(n), alpha, AP, x, incx, beta, y, incy)
- return y
- end
- # TBMV: x = A*x or x = A'*x (Triangular Band Matrix-Vector Multiply)
- # s/dtbmv (single/double precision)
- function aocl_stbmv!(uplo::Char, trans::Char, diag::Char, n::Int, k::Int, A::Matrix{Float32}, x::Vector{Float32})
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(size(A, 1)) # LDA for triangular band matrix is K + 1
- incx = BlasIntType(1)
- call_external_function(handle, :stbmv_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType}),
- uplo, trans, diag, BlasIntType(n), BlasIntType(k), A, lda, x, incx)
- return x
- end
- function aocl_dtbmv!(uplo::Char, trans::Char, diag::Char, n::Int, k::Int, A::Matrix{Float64}, x::Vector{Float64})
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(size(A, 1)) # LDA for triangular band matrix is K + 1
- incx = BlasIntType(1)
- call_external_function(handle, :dtbmv_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}),
- uplo, trans, diag, BlasIntType(n), BlasIntType(k), A, lda, x, incx)
- return x
- end
- # TPMV: x = A*x or x = A'*x (Triangular Packed Matrix-Vector Multiply)
- # s/dtpmv (single/double precision)
- function aocl_stpmv!(uplo::Char, trans::Char, diag::Char, n::Int, AP::Vector{Float32}, x::Vector{Float32})
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- incx = BlasIntType(1)
- call_external_function(handle, :stpmv_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ptr{Float32}, Ptr{Float32}, Ref{BlasIntType}),
- uplo, trans, diag, BlasIntType(n), AP, x, incx)
- return x
- end
- function aocl_dtpmv!(uplo::Char, trans::Char, diag::Char, n::Int, AP::Vector{Float64}, x::Vector{Float64})
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- incx = BlasIntType(1)
- call_external_function(handle, :dtpmv_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ptr{Float64}, Ptr{Float64}, Ref{BlasIntType}),
- uplo, trans, diag, BlasIntType(n), AP, x, incx)
- return x
- end
- # TPSV: Solve A*x = b or A'*x = b (Triangular Packed Solve)
- # s/dtpsv (single/double precision)
- function aocl_stpsv!(uplo::Char, trans::Char, diag::Char, n::Int, AP::Vector{Float32}, x::Vector{Float32})
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- incx = BlasIntType(1)
- call_external_function(handle, :stpsv_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ptr{Float32}, Ptr{Float32}, Ref{BlasIntType}),
- uplo, trans, diag, BlasIntType(n), AP, x, incx)
- return x
- end
- function aocl_dtpsv!(uplo::Char, trans::Char, diag::Char, n::Int, AP::Vector{Float64}, x::Vector{Float64})
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- incx = BlasIntType(1)
- call_external_function(handle, :dtpsv_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ptr{Float64}, Ptr{Float64}, Ref{BlasIntType}),
- uplo, trans, diag, BlasIntType(n), AP, x, incx)
- return x
- end
- # SPR: A = alpha*x*x' + A (Rank-1 update of a symmetric packed matrix)
- # s/dspr (single/double precision)
- function aocl_sspr!(uplo::Char, n::Int, alpha::Float32, x::Vector{Float32}, AP::Vector{Float32})
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- incx = BlasIntType(1)
- call_external_function(handle, :sspr_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{Float32}, Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}),
- uplo, BlasIntType(n), alpha, x, incx, AP)
- return AP
- end
- function aocl_dspr!(uplo::Char, n::Int, alpha::Float64, x::Vector{Float64}, AP::Vector{Float64})
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- incx = BlasIntType(1)
- call_external_function(handle, :dspr_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{Float64}, Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}),
- uplo, BlasIntType(n), alpha, x, incx, AP)
- return AP
- end
- # SPR2: A = alpha*x*y' + alpha*y*x' + A (Rank-2 update of a symmetric packed matrix)
- # s/dspr2 (single/double precision)
- function aocl_sspr2!(uplo::Char, n::Int, alpha::Float32, x::Vector{Float32}, y::Vector{Float32}, AP::Vector{Float32})
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- incx = BlasIntType(1)
- incy = BlasIntType(1)
- call_external_function(handle, :sspr2_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{Float32}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}),
- uplo, BlasIntType(n), alpha, x, incx, y, incy, AP)
- return AP
- end
- function aocl_dspr2!(uplo::Char, n::Int, alpha::Float64, x::Vector{Float64}, y::Vector{Float64}, AP::Vector{Float64})
- handle = n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- incx = BlasIntType(1)
- incy = BlasIntType(1)
- call_external_function(handle, :dspr2_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{Float64}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}),
- uplo, BlasIntType(n), alpha, x, incx, y, incy, AP)
- return AP
- end
- # --- BLAS Level 3 Functions (Matrix-Matrix Operations) ---
- # GEMM: C = alpha*A*B + beta*C
- # s/dgemm (single/double precision)
- function aocl_sgemm!(transa::Char, transb::Char, C::Matrix{Float32}, A::Matrix{Float32}, B::Matrix{Float32}, alpha::Float32, beta::Float32)
- M, N_ = size(C)
- K = (transa == 'N' || transa == 'n') ? size(A, 2) : size(A, 1)
- handle = M > typemax(Int32) || N_ > typemax(Int32) || K > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = M > typemax(Int32) || N_ > typemax(Int32) || K > typemax(Int32) ? Int64 : Int32
- m = BlasIntType(M)
- n = BlasIntType(N_)
- k = BlasIntType(K)
- lda = BlasIntType(max(1, stride(A,2)))
- ldb = BlasIntType(max(1, stride(B,2)))
- ldc = BlasIntType(max(1, stride(C,2)))
- call_external_function(handle, :sgemm_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType},
- Ref{Float32}, Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ref{Float32}, Ptr{Float32}, Ref{BlasIntType}),
- transa, transb, m, n, k,
- alpha, A, lda, B, ldb,
- beta, C, ldc)
- return C
- end
- function aocl_dgemm!(transa::Char, transb::Char, C::Matrix{Float64}, A::Matrix{Float64}, B::Matrix{Float64}, alpha::Float64, beta::Float64)
- M, N_ = size(C)
- K = (transa == 'N' || transa == 'n') ? size(A, 2) : size(A, 1)
- handle = M > typemax(Int32) || N_ > typemax(Int32) || K > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = M > typemax(Int32) || N_ > typemax(Int32) || K > typemax(Int32) ? Int64 : Int32
- m = BlasIntType(M)
- n = BlasIntType(N_)
- k = BlasIntType(K)
- lda = BlasIntType(max(1, stride(A,2)))
- ldb = BlasIntType(max(1, stride(B,2)))
- ldc = BlasIntType(max(1, stride(C,2)))
- call_external_function(handle, :dgemm_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType},
- Ref{Float64}, Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ref{Float64}, Ptr{Float64}, Ref{BlasIntType}),
- transa, transb, m, n, k,
- alpha, A, lda, B, ldb,
- beta, C, ldc)
- return C
- end
- # TRMM: B = alpha*A*B or B = alpha*B*A (Triangular Matrix-Matrix Multiply)
- # s/dtrmm (single/double precision)
- function aocl_strmm!(side::Char, uplo::Char, transa::Char, diag::Char, B::Matrix{Float32}, A::Matrix{Float32}, alpha::Float32)
- m, n = size(B)
- handle = m > typemax(Int32) || n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = m > typemax(Int32) || n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(max(1, stride(A,2)))
- ldb = BlasIntType(max(1, stride(B,2)))
- call_external_function(handle, :strmm_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType},
- Ref{Float32}, Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType}),
- side, uplo, transa, diag, BlasIntType(m), BlasIntType(n),
- alpha, A, lda, B, ldb)
- return B
- end
- function aocl_dtrmm!(side::Char, uplo::Char, transa::Char, diag::Char, B::Matrix{Float64}, A::Matrix{Float64}, alpha::Float64)
- m, n = size(B)
- handle = m > typemax(Int32) || n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = m > typemax(Int32) || n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(max(1, stride(A,2)))
- ldb = BlasIntType(max(1, stride(B,2)))
- call_external_function(handle, :dtrmm_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType},
- Ref{Float64}, Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}),
- side, uplo, transa, diag, BlasIntType(m), BlasIntType(n),
- alpha, A, lda, B, ldb)
- return B
- end
- # TRSM: Solve A*X = B or X*A = B (Triangular Solve for multiple right-hand sides)
- # s/dtrsm (single/double precision)
- function aocl_strsm!(side::Char, uplo::Char, transa::Char, diag::Char, B::Matrix{Float32}, A::Matrix{Float32}, alpha::Float32)
- m, n = size(B)
- handle = m > typemax(Int32) || n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = m > typemax(Int32) || n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(max(1, stride(A,2)))
- ldb = BlasIntType(max(1, stride(B,2)))
- call_external_function(handle, :strsm_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType},
- Ref{Float32}, Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType}),
- side, uplo, transa, diag, BlasIntType(m), BlasIntType(n),
- alpha, A, lda, B, ldb)
- return B
- end
- function aocl_dtrsm!(side::Char, uplo::Char, transa::Char, diag::Char, B::Matrix{Float64}, A::Matrix{Float64}, alpha::Float64)
- m, n = size(B)
- handle = m > typemax(Int32) || n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = m > typemax(Int32) || n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(max(1, stride(A,2)))
- ldb = BlasIntType(max(1, stride(B,2)))
- call_external_function(handle, :dtrsm_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType},
- Ref{Float64}, Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}),
- side, uplo, transa, diag, BlasIntType(m), BlasIntType(n),
- alpha, A, lda, B, ldb)
- return B
- end
- # SYMM: C = alpha*A*B + beta*C (Symmetric Matrix-Matrix Multiply)
- # s/dsymm (single/double precision)
- function aocl_ssymm!(side::Char, uplo::Char, C::Matrix{Float32}, A::Matrix{Float32}, B::Matrix{Float32}, alpha::Float32, beta::Float32)
- m, n = size(C)
- handle = m > typemax(Int32) || n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = m > typemax(Int32) || n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(max(1, stride(A,2)))
- ldb = BlasIntType(max(1, stride(B,2)))
- ldc = BlasIntType(max(1, stride(C,2)))
- call_external_function(handle, :ssymm_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{Float32},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ref{Float32}, Ptr{Float32}, Ref{BlasIntType}),
- side, uplo, BlasIntType(m), BlasIntType(n), alpha, A, lda, B, ldb, beta, C, ldc)
- return C
- end
- function aocl_dsymm!(side::Char, uplo::Char, C::Matrix{Float64}, A::Matrix{Float64}, B::Matrix{Float64}, alpha::Float64, beta::Float64)
- m, n = size(C)
- handle = m > typemax(Int32) || n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = m > typemax(Int32) || n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(max(1, stride(A,2)))
- ldb = BlasIntType(max(1, stride(B,2)))
- ldc = BlasIntType(max(1, stride(C,2)))
- call_external_function(handle, :dsymm_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{Float64},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ref{Float64}, Ptr{Float64}, Ref{BlasIntType}),
- side, uplo, BlasIntType(m), BlasIntType(n), alpha, A, lda, B, ldb, beta, C, ldc)
- return C
- end
- # SYRK: C = alpha*A*A' + beta*C (Symmetric Rank-k update)
- # s/dsyrk (single/double precision)
- function aocl_ssyrk!(uplo::Char, trans::Char, C::Matrix{Float32}, A::Matrix{Float32}, alpha::Float32, beta::Float32)
- n = size(C, 1)
- k = (trans == 'N' || trans == 'n') ? size(A, 2) : size(A, 1)
- handle = n > typemax(Int32) || k > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(max(1, stride(A,2)))
- ldc = BlasIntType(max(1, stride(C,2)))
- call_external_function(handle, :ssyrk_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{Float32},
- Ptr{Float32}, Ref{BlasIntType}, Ref{Float32}, Ptr{Float32}, Ref{BlasIntType}),
- uplo, trans, BlasIntType(n), BlasIntType(k), alpha, A, lda, beta, C, ldc)
- return C
- end
- function aocl_dsyrk!(uplo::Char, trans::Char, C::Matrix{Float64}, A::Matrix{Float64}, alpha::Float64, beta::Float64)
- n = size(C, 1)
- k = (trans == 'N' || trans == 'n') ? size(A, 2) : size(A, 1)
- handle = n > typemax(Int32) || k > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(max(1, stride(A,2)))
- ldc = BlasIntType(max(1, stride(C,2)))
- call_external_function(handle, :dsyrk_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{Float64},
- Ptr{Float64}, Ref{BlasIntType}, Ref{Float64}, Ptr{Float64}, Ref{BlasIntType}),
- uplo, trans, BlasIntType(n), BlasIntType(k), alpha, A, lda, beta, C, ldc)
- return C
- end
- # SYR2K: C = alpha*A*B' + alpha*B*A' + beta*C (Symmetric Rank-2k update)
- # s/dsyr2k (single/double precision)
- function aocl_ssyr2k!(uplo::Char, trans::Char, C::Matrix{Float32}, A::Matrix{Float32}, B::Matrix{Float32}, alpha::Float32, beta::Float32)
- n = size(C, 1)
- k = (trans == 'N' || trans == 'n') ? size(A, 2) : size(A, 1)
- handle = n > typemax(Int32) || k > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(max(1, stride(A,2)))
- ldb = BlasIntType(max(1, stride(B,2)))
- ldc = BlasIntType(max(1, stride(C,2)))
- call_external_function(handle, :ssyr2k_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{Float32},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ref{Float32}, Ptr{Float32}, Ref{BlasIntType}),
- uplo, trans, BlasIntType(n), BlasIntType(k), alpha, A, lda, B, ldb, beta, C, ldc)
- return C
- end
- function aocl_dsyr2k!(uplo::Char, trans::Char, C::Matrix{Float64}, A::Matrix{Float64}, B::Matrix{Float64}, alpha::Float64, beta::Float64)
- n = size(C, 1)
- k = (trans == 'N' || trans == 'n') ? size(A, 2) : size(A, 1)
- handle = n > typemax(Int32) || k > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(max(1, stride(A,2)))
- ldb = BlasIntType(max(1, stride(B,2)))
- ldc = BlasIntType(max(1, stride(C,2)))
- call_external_function(handle, :dsyr2k_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{Float64},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ref{Float64}, Ptr{Float64}, Ref{BlasIntType}),
- uplo, trans, BlasIntType(n), BlasIntType(k), alpha, A, lda, B, ldb, beta, C, ldc)
- return C
- end
- # HEMM: C = alpha*A*B + beta*C (Hermitian Matrix-Matrix Multiply)
- # zhemm (double precision complex)
- function aocl_zhemm!(side::Char, uplo::Char, C::Matrix{ComplexF64}, A::Matrix{ComplexF64}, B::Matrix{ComplexF64}, alpha::ComplexF64, beta::ComplexF64)
- m, n = size(C)
- handle = m > typemax(Int32) || n > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = m > typemax(Int32) || n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(max(1, stride(A,2)))
- ldb = BlasIntType(max(1, stride(B,2)))
- ldc = BlasIntType(max(1, stride(C,2)))
- call_external_function(handle, :zhemm_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{ComplexF64},
- Ptr{ComplexF64}, Ref{BlasIntType}, Ptr{ComplexF64}, Ref{BlasIntType},
- Ref{ComplexF64}, Ptr{ComplexF64}, Ref{BlasIntType}),
- side, uplo, BlasIntType(m), BlasIntType(n), alpha, A, lda, B, ldb, beta, C, ldc)
- return C
- end
- # HERK: C = alpha*A*A' + beta*C (Hermitian Rank-k update)
- # zherk (double precision complex)
- function aocl_zherk!(uplo::Char, trans::Char, C::Matrix{ComplexF64}, A::Matrix{ComplexF64}, alpha::Float64, beta::Float64)
- n = size(C, 1)
- k = (trans == 'N' || trans == 'n') ? size(A, 2) : size(A, 1)
- handle = n > typemax(Int32) || k > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(max(1, stride(A,2)))
- ldc = BlasIntType(max(1, stride(C,2)))
- call_external_function(handle, :zherk_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{Float64},
- Ptr{ComplexF64}, Ref{BlasIntType}, Ref{Float64}, Ptr{ComplexF64}, Ref{BlasIntType}),
- uplo, trans, BlasIntType(n), BlasIntType(k), alpha, A, lda, beta, C, ldc)
- return C
- end
- # HER2K: C = alpha*A*B' + alpha*B*A' + beta*C (Hermitian Rank-2k update)
- # zher2k (double precision complex)
- function aocl_zher2k!(uplo::Char, trans::Char, C::Matrix{ComplexF64}, A::Matrix{ComplexF64}, B::Matrix{ComplexF64}, alpha::ComplexF64, beta::Float64)
- n = size(C, 1)
- k = (trans == 'N' || trans == 'n') ? size(A, 2) : size(A, 1)
- handle = n > typemax(Int32) || k > typemax(Int32) ? blis_ilp64_handle : blis_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(max(1, stride(A,2)))
- ldb = BlasIntType(max(1, stride(B,2)))
- ldc = BlasIntType(max(1, stride(C,2)))
- call_external_function(handle, :zher2k_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{ComplexF64},
- Ptr{ComplexF64}, Ref{BlasIntType}, Ptr{ComplexF64}, Ref{BlasIntType},
- Ref{Float64}, Ptr{ComplexF64}, Ref{BlasIntType}),
- uplo, trans, BlasIntType(n), BlasIntType(k), alpha, A, lda, B, ldb, beta, C, ldc)
- return C
- end
- # --- LAPACK Functions ---
- # GESV: Solve linear system of equations AX = B
- # s/dgesv (single/double precision)
- function aocl_sgesv!(A::Matrix{Float32}, B::Matrix{Float32})
- n = size(A, 1)
- nrhs = size(B, 2)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(n)
- ldb = BlasIntType(n)
- ipiv = Vector{Int32}(undef, n) # IPIV is typically Int32 even for ILP64
- info = Ref{BlasIntType}(0)
- call_external_function(handle, :sgesv_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Int32}, Ptr{Float32}, Ref{BlasIntType}, Ref{BlasIntType}),
- BlasIntType(n), BlasIntType(nrhs), A, lda, ipiv, B, ldb, info)
- return A, B, ipiv, info[]
- end
- function aocl_dgesv!(A::Matrix{Float64}, B::Matrix{Float64})
- n = size(A, 1)
- nrhs = size(B, 2)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(n)
- ldb = BlasIntType(n)
- ipiv = Vector{Int32}(undef, n) # IPIV is typically Int32 even for ILP64
- info = Ref{BlasIntType}(0)
- call_external_function(handle, :dgesv_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Int32}, Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- BlasIntType(n), BlasIntType(nrhs), A, lda, ipiv, B, ldb, info)
- return A, B, ipiv, info[]
- end
- # GETRF: LU factorization
- # s/dgetrf (single/double precision)
- function aocl_sgetrf!(A::Matrix{Float32})
- m, n = size(A)
- handle = m > typemax(Int32) || n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = m > typemax(Int32) || n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(m)
- ipiv = Vector{Int32}(undef, min(m, n))
- info = Ref{BlasIntType}(0)
- call_external_function(handle, :sgetrf_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Int32}, Ref{BlasIntType}),
- BlasIntType(m), BlasIntType(n), A, lda, ipiv, info)
- return A, ipiv, info[]
- end
- function aocl_dgetrf!(A::Matrix{Float64})
- m, n = size(A)
- handle = m > typemax(Int32) || n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = m > typemax(Int32) || n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(m)
- ipiv = Vector{Int32}(undef, min(m, n))
- info = Ref{BlasIntType}(0)
- call_external_function(handle, :dgetrf_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Int32}, Ref{BlasIntType}),
- BlasIntType(m), BlasIntType(n), A, lda, ipiv, info)
- return A, ipiv, info[]
- end
- # GETRI: Matrix inversion from LU factorization
- # s/dgetri (single/double precision)
- function aocl_sgetri!(A::Matrix{Float32}, ipiv::Vector{Int32})
- n = size(A, 1)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(n)
- info = Ref{BlasIntType}(0)
- work = Vector{Float32}(undef, 1) # Placeholder, will be resized by LAPACK if LWORK=-1
- lwork = BlasIntType(-1) # Query optimal workspace size
- call_external_function(handle, :sgetri_, Cvoid,
- (Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType}, Ptr{Int32},
- Ptr{Float32}, Ref{BlasIntType}, Ref{BlasIntType}),
- BlasIntType(n), A, lda, ipiv, work, lwork, info)
- if info[] == 0 && lwork[] == -1 # Check if query was successful
- lwork = BlasIntType(round(Int, work[1]))
- work = Vector{Float32}(undef, lwork)
- call_external_function(handle, :sgetri_, Cvoid,
- (Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType}, Ptr{Int32},
- Ptr{Float32}, Ref{BlasIntType}, Ref{BlasIntType}),
- BlasIntType(n), A, lda, ipiv, work, lwork, info)
- end
- return A, info[]
- end
- function aocl_dgetri!(A::Matrix{Float64}, ipiv::Vector{Int32})
- n = size(A, 1)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(n)
- info = Ref{BlasIntType}(0)
- work = Vector{Float64}(undef, 1) # Placeholder
- lwork = BlasIntType(-1) # Query optimal workspace size
- call_external_function(handle, :dgetri_, Cvoid,
- (Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}, Ptr{Int32},
- Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- BlasIntType(n), A, lda, ipiv, work, lwork, info)
- if info[] == 0 && lwork[] == -1
- lwork = BlasIntType(round(Int, work[1]))
- work = Vector{Float64}(undef, lwork)
- call_external_function(handle, :dgetri_, Cvoid,
- (Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}, Ptr{Int32},
- Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- BlasIntType(n), A, lda, ipiv, work, lwork, info)
- end
- return A, info[]
- end
- # GETRS: Solve system of linear equations using LU factorization
- # s/dgetrs (single/double precision)
- function aocl_sgetrs!(trans::Char, A::Matrix{Float32}, B::Matrix{Float32}, ipiv::Vector{Int32})
- n = size(A, 1)
- nrhs = size(B, 2)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(n)
- ldb = BlasIntType(n)
- info = Ref{BlasIntType}(0)
- call_external_function(handle, :sgetrs_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Int32}, Ptr{Float32}, Ref{BlasIntType}, Ref{BlasIntType}),
- trans, BlasIntType(n), BlasIntType(nrhs), A, lda, ipiv, B, ldb, info)
- return B, info[]
- end
- function aocl_dgetrs!(trans::Char, A::Matrix{Float64}, B::Matrix{Float64}, ipiv::Vector{Int32})
- n = size(A, 1)
- nrhs = size(B, 2)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(n)
- ldb = BlasIntType(n)
- info = Ref{BlasIntType}(0)
- call_external_function(handle, :dgetrs_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Int32}, Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- trans, BlasIntType(n), BlasIntType(nrhs), A, lda, ipiv, B, ldb, info)
- return B, info[]
- end
- # GEQRF: QR factorization
- # s/dgeqrf (single/double precision)
- function aocl_sgeqrf!(A::Matrix{Float32})
- m, n = size(A)
- handle = m > typemax(Int32) || n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = m > typemax(Int32) || n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(m)
- tau = Vector{Float32}(undef, min(m, n))
- info = Ref{BlasIntType}(0)
- work = Vector{Float32}(undef, 1) # Query optimal workspace size
- lwork = BlasIntType(-1)
- call_external_function(handle, :sgeqrf_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ptr{Float32}, Ref{BlasIntType}, Ref{BlasIntType}),
- BlasIntType(m), BlasIntType(n), A, lda, tau, work, lwork, info)
- if info[] == 0 && lwork[] == -1
- lwork = BlasIntType(round(Int, work[1]))
- work = Vector{Float32}(undef, lwork)
- call_external_function(handle, :sgeqrf_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ptr{Float32}, Ref{BlasIntType}, Ref{BlasIntType}),
- BlasIntType(m), BlasIntType(n), A, lda, tau, work, lwork, info)
- end
- return A, tau, info[]
- end
- function aocl_dgeqrf!(A::Matrix{Float64})
- m, n = size(A)
- handle = m > typemax(Int32) || n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = m > typemax(Int32) || n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(m)
- tau = Vector{Float64}(undef, min(m, n))
- info = Ref{BlasIntType}(0)
- work = Vector{Float64}(undef, 1) # Query optimal workspace size
- lwork = BlasIntType(-1)
- call_external_function(handle, :dgeqrf_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- BlasIntType(m), BlasIntType(n), A, lda, tau, work, lwork, info)
- if info[] == 0 && lwork[] == -1
- lwork = BlasIntType(round(Int, work[1]))
- work = Vector{Float64}(undef, lwork)
- call_external_function(handle, :dgeqrf_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- BlasIntType(m), BlasIntType(n), A, lda, tau, work, lwork, info)
- end
- return A, tau, info[]
- end
- # SYEV: Eigenvalues and (optionally) eigenvectors of a real symmetric matrix
- # s/dsyev (single/double precision)
- function aocl_ssyev!(jobz::Char, uplo::Char, A::Matrix{Float32})
- n = size(A, 1)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(n)
- w = Vector{Float32}(undef, n) # Eigenvalues
- info = Ref{BlasIntType}(0)
- # Query optimal workspace size
- work = Vector{Float32}(undef, 1)
- lwork = BlasIntType(-1)
- call_external_function(handle, :ssyev_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ptr{Float32}, Ref{BlasIntType}, Ref{BlasIntType}),
- jobz, uplo, BlasIntType(n), A, lda, w, work, lwork, info)
- if info[] == 0 && lwork[] == -1
- lwork = BlasIntType(round(Int, work[1]))
- work = Vector{Float32}(undef, lwork)
- call_external_function(handle, :ssyev_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ptr{Float32}, Ref{BlasIntType}, Ref{BlasIntType}),
- jobz, uplo, BlasIntType(n), A, lda, w, work, lwork, info)
- end
- return A, w, info[] # A contains eigenvectors if jobz='V'
- end
- function aocl_dsyev!(jobz::Char, uplo::Char, A::Matrix{Float64})
- n = size(A, 1)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(n)
- w = Vector{Float64}(undef, n) # Eigenvalues
- info = Ref{BlasIntType}(0)
- # Query optimal workspace size
- work = Vector{Float64}(undef, 1)
- lwork = BlasIntType(-1)
- call_external_function(handle, :dsyev_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- jobz, uplo, BlasIntType(n), A, lda, w, work, lwork, info)
- if info[] == 0 && lwork[] == -1
- lwork = BlasIntType(round(Int, work[1]))
- work = Vector{Float64}(undef, lwork)
- call_external_function(handle, :dsyev_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- jobz, uplo, BlasIntType(n), A, lda, w, work, lwork, info)
- end
- return A, w, info[] # A contains eigenvectors if jobz='V'
- end
- # POTRF: Cholesky factorization of a symmetric positive definite matrix
- # s/dpotrf (single/double precision)
- function aocl_spotrf!(uplo::Char, A::Matrix{Float32})
- n = size(A, 1)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(n)
- info = Ref{BlasIntType}(0)
- call_external_function(handle, :spotrf_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType}, Ref{BlasIntType}),
- uplo, BlasIntType(n), A, lda, info)
- return A, info[]
- end
- function aocl_dpotrf!(uplo::Char, A::Matrix{Float64})
- n = size(A, 1)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(n)
- info = Ref{BlasIntType}(0)
- call_external_function(handle, :dpotrf_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- uplo, BlasIntType(n), A, lda, info)
- return A, info[]
- end
- # POTRS: Solve A*X = B for symmetric positive definite A using Cholesky factorization
- # s/dpotrs (single/double precision)
- function aocl_spotrs!(uplo::Char, A::Matrix{Float32}, B::Matrix{Float32})
- n = size(A, 1)
- nrhs = size(B, 2)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(n)
- ldb = BlasIntType(n)
- info = Ref{BlasIntType}(0)
- call_external_function(handle, :spotrs_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ref{BlasIntType}),
- uplo, BlasIntType(n), BlasIntType(nrhs), A, lda, B, ldb, info)
- return B, info[]
- end
- function aocl_dpotrs!(uplo::Char, A::Matrix{Float64}, B::Matrix{Float64})
- n = size(A, 1)
- nrhs = size(B, 2)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(n)
- ldb = BlasIntType(n)
- info = Ref{BlasIntType}(0)
- call_external_function(handle, :dpotrs_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- uplo, BlasIntType(n), BlasIntType(nrhs), A, lda, B, ldb, info)
- return B, info[]
- end
- # POTRI: Compute the inverse of a symmetric positive definite matrix using Cholesky factorization
- # s/dpotri (single/double precision)
- function aocl_spotri!(uplo::Char, A::Matrix{Float32})
- n = size(A, 1)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(n)
- info = Ref{BlasIntType}(0)
- call_external_function(handle, :spotri_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType}, Ref{BlasIntType}),
- uplo, BlasIntType(n), A, lda, info)
- return A, info[]
- end
- function aocl_dpotri!(uplo::Char, A::Matrix{Float64})
- n = size(A, 1)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(n)
- info = Ref{BlasIntType}(0)
- call_external_function(handle, :dpotri_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- uplo, BlasIntType(n), A, lda, info)
- return A, info[]
- end
- # ORGQR: Generate Q from QR factorization
- # s/dorgqr (single/double precision)
- function aocl_sorgqr!(A::Matrix{Float32}, tau::Vector{Float32})
- m, n = size(A)
- k = length(tau)
- handle = m > typemax(Int32) || n > typemax(Int32) || k > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = m > typemax(Int32) || n > typemax(Int32) || k > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(m)
- info = Ref{BlasIntType}(0)
- work = Vector{Float32}(undef, 1) # Query optimal workspace size
- lwork = BlasIntType(-1)
- call_external_function(handle, :sorgqr_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ptr{Float32}, Ref{BlasIntType}, Ref{BlasIntType}),
- BlasIntType(m), BlasIntType(n), BlasIntType(k), A, lda, tau, work, lwork, info)
- if info[] == 0 && lwork[] == -1
- lwork = BlasIntType(round(Int, work[1]))
- work = Vector{Float32}(undef, lwork)
- call_external_function(handle, :sorgqr_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ptr{Float32}, Ref{BlasIntType}, Ref{BlasIntType}),
- BlasIntType(m), BlasIntType(n), BlasIntType(k), A, lda, tau, work, lwork, info)
- end
- return A, info[]
- end
- function aocl_dorgqr!(A::Matrix{Float64}, tau::Vector{Float64})
- m, n = size(A)
- k = length(tau)
- handle = m > typemax(Int32) || n > typemax(Int32) || k > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = m > typemax(Int32) || n > typemax(Int32) || k > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(m)
- info = Ref{BlasIntType}(0)
- work = Vector{Float64}(undef, 1) # Query optimal workspace size
- lwork = BlasIntType(-1)
- call_external_function(handle, :dorgqr_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- BlasIntType(m), BlasIntType(n), BlasIntType(k), A, lda, tau, work, lwork, info)
- if info[] == 0 && lwork[] == -1
- lwork = BlasIntType(round(Int, work[1]))
- work = Vector{Float64}(undef, lwork)
- call_external_function(handle, :dorgqr_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- BlasIntType(m), BlasIntType(n), BlasIntType(k), A, lda, tau, work, lwork, info)
- end
- return A, info[]
- end
- # UNMQR: Apply Q from QR factorization to a matrix
- # s/dunmqr (single/double precision)
- function aocl_sunmqr!(side::Char, trans::Char, C::Matrix{Float32}, A::Matrix{Float32}, tau::Vector{Float32})
- m, n = size(C)
- k = length(tau)
- handle = m > typemax(Int32) || n > typemax(Int32) || k > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = m > typemax(Int32) || n > typemax(Int32) || k > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(size(A, 1))
- ldc = BlasIntType(m)
- info = Ref{BlasIntType}(0)
- work = Vector{Float32}(undef, 1) # Query optimal workspace size
- lwork = BlasIntType(-1)
- call_external_function(handle, :sunmqr_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ref{BlasIntType}),
- side, trans, BlasIntType(m), BlasIntType(n), BlasIntType(k),
- A, lda, tau, C, ldc, work, lwork, info)
- if info[] == 0 && lwork[] == -1
- lwork = BlasIntType(round(Int, work[1]))
- work = Vector{Float32}(undef, lwork)
- call_external_function(handle, :sunmqr_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ref{BlasIntType}),
- side, trans, BlasIntType(m), BlasIntType(n), BlasIntType(k),
- A, lda, tau, C, ldc, work, lwork, info)
- end
- return C, info[]
- end
- function aocl_dunmqr!(side::Char, trans::Char, C::Matrix{Float64}, A::Matrix{Float64}, tau::Vector{Float64})
- m, n = size(C)
- k = length(tau)
- handle = m > typemax(Int32) || n > typemax(Int32) || k > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = m > typemax(Int32) || n > typemax(Int32) || k > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(size(A, 1))
- ldc = BlasIntType(m)
- info = Ref{BlasIntType}(0)
- work = Vector{Float64}(undef, 1) # Query optimal workspace size
- lwork = BlasIntType(-1)
- call_external_function(handle, :dunmqr_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- side, trans, BlasIntType(m), BlasIntType(n), BlasIntType(k),
- A, lda, tau, C, ldc, work, lwork, info)
- if info[] == 0 && lwork[] == -1
- lwork = BlasIntType(round(Int, work[1]))
- work = Vector{Float64}(undef, lwork)
- call_external_function(handle, :dunmqr_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- side, trans, BlasIntType(m), BlasIntType(n), BlasIntType(k),
- A, lda, tau, C, ldc, work, lwork, info)
- end
- return C, info[]
- end
- # GESVD: Singular Value Decomposition
- # s/dgesvd (single/double precision)
- function aocl_sgesvd!(jobu::Char, jobvt::Char, A::Matrix{Float32})
- m, n = size(A)
- handle = m > typemax(Int32) || n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = m > typemax(Int32) || n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(m)
- s = Vector{Float32}(undef, min(m, n)) # Singular values
- # Determine dimensions for U and VT based on jobu/jobvt
- ldu = BlasIntType(1)
- if jobu == 'A' || jobu == 'a'
- ldu = BlasIntType(m)
- elseif jobu == 'S' || jobu == 's'
- ldu = BlasIntType(m)
- end
- U = Matrix{Float32}(undef, ldu, (jobu == 'A' || jobu == 'a') ? m : (jobu == 'S' || jobu == 's' ? min(m, n) : 1))
- ldvt = BlasIntType(1)
- if jobvt == 'A' || jobvt == 'a'
- ldvt = BlasIntType(n)
- elseif jobvt == 'O' || jobvt == 'o'
- ldvt = BlasIntType(min(m, n))
- end
- VT = Matrix{Float32}(undef, ldvt, n)
- info = Ref{BlasIntType}(0)
- work = Vector{Float32}(undef, 1) # Query optimal workspace size
- lwork = BlasIntType(-1)
- call_external_function(handle, :sgesvd_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType}, Ref{BlasIntType}),
- jobu, jobvt, BlasIntType(m), BlasIntType(n),
- A, lda, s, U, ldu, VT, ldvt, work, lwork, info)
- if info[] == 0 && lwork[] == -1
- lwork = BlasIntType(round(Int, work[1]))
- work = Vector{Float32}(undef, lwork)
- call_external_function(handle, :sgesvd_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType}, Ref{BlasIntType}),
- jobu, jobvt, BlasIntType(m), BlasIntType(n),
- A, lda, s, U, ldu, VT, ldvt, work, lwork, info)
- end
- return U, s, VT, info[]
- end
- function aocl_dgesvd!(jobu::Char, jobvt::Char, A::Matrix{Float64})
- m, n = size(A)
- handle = m > typemax(Int32) || n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = m > typemax(Int32) || n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(m)
- s = Vector{Float64}(undef, min(m, n)) # Singular values
- ldu = BlasIntType(1)
- if jobu == 'A' || jobu == 'a'
- ldu = BlasIntType(m)
- elseif jobu == 'S' || jobu == 's'
- ldu = BlasIntType(m)
- end
- U = Matrix{Float64}(undef, ldu, (jobu == 'A' || jobu == 'a') ? m : (jobu == 'S' || jobu == 's' ? min(m, n) : 1))
- ldvt = BlasIntType(1)
- if jobvt == 'A' || jobvt == 'a'
- ldvt = BlasIntType(n)
- elseif jobvt == 'O' || jobvt == 'o'
- ldvt = BlasIntType(min(m, n))
- end
- VT = Matrix{Float64}(undef, ldvt, n)
- info = Ref{BlasIntType}(0)
- work = Vector{Float64}(undef, 1) # Query optimal workspace size
- lwork = BlasIntType(-1)
- call_external_function(handle, :dgesvd_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- jobu, jobvt, BlasIntType(m), BlasIntType(n),
- A, lda, s, U, ldu, VT, ldvt, work, lwork, info)
- if info[] == 0 && lwork[] == -1
- lwork = BlasIntType(round(Int, work[1]))
- work = Vector{Float64}(undef, lwork)
- call_external_function(handle, :dgesvd_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- jobu, jobvt, BlasIntType(m), BlasIntType(n),
- A, lda, s, U, ldu, VT, ldvt, work, lwork, info)
- end
- return U, s, VT, info[]
- end
- # LASET: Initializes a matrix to zero or to a constant
- # s/dlaset (single/double precision)
- function aocl_slaset!(uplo::Char, m::Int, n::Int, alpha::Float32, beta::Float32, A::Matrix{Float32})
- handle = m > typemax(Int32) || n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = m > typemax(Int32) || n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(size(A, 1))
- call_external_function(handle, :slaset_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{Float32}, Ref{Float32},
- Ptr{Float32}, Ref{BlasIntType}),
- uplo, BlasIntType(m), BlasIntType(n), alpha, beta, A, lda)
- return A
- end
- function aocl_dlaset!(uplo::Char, m::Int, n::Int, alpha::Float64, beta::Float64, A::Matrix{Float64})
- handle = m > typemax(Int32) || n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = m > typemax(Int32) || n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(size(A, 1))
- call_external_function(handle, :dlaset_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{Float64}, Ref{Float64},
- Ptr{Float64}, Ref{BlasIntType}),
- uplo, BlasIntType(m), BlasIntType(n), alpha, beta, A, lda)
- return A
- end
- # GEEV: Compute eigenvalues and, optionally, eigenvectors of a general matrix
- # s/dgeev (single/double precision)
- function aocl_sgeev!(jobvl::Char, jobvr::Char, A::Matrix{Float32})
- n = size(A, 1)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(n)
- wr = Vector{Float32}(undef, n)
- wi = Vector{Float32}(undef, n)
- ldvl = BlasIntType(1)
- if jobvl == 'V' || jobvl == 'v'
- ldvl = BlasIntType(n)
- end
- VL = Matrix{Float32}(undef, ldvl, n)
- ldvr = BlasIntType(1)
- if jobvr == 'V' || jobvr == 'v'
- ldvr = BlasIntType(n)
- end
- VR = Matrix{Float32}(undef, ldvr, n)
- info = Ref{BlasIntType}(0)
- work = Vector{Float32}(undef, 1) # Query optimal workspace size
- lwork = BlasIntType(-1)
- call_external_function(handle, :sgeev_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType}, Ref{BlasIntType}),
- jobvl, jobvr, BlasIntType(n), A, lda, wr, wi, VL, ldvl, VR, ldvr, work, lwork, info)
- if info[] == 0 && lwork[] == -1
- lwork = BlasIntType(round(Int, work[1]))
- work = Vector{Float32}(undef, lwork)
- call_external_function(handle, :sgeev_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType}, Ref{BlasIntType}),
- jobvl, jobvr, BlasIntType(n), A, lda, wr, wi, VL, ldvl, VR, ldvr, work, lwork, info)
- end
- return wr, wi, VL, VR, info[]
- end
- function aocl_dgeev!(jobvl::Char, jobvr::Char, A::Matrix{Float64})
- n = size(A, 1)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(n)
- wr = Vector{Float64}(undef, n)
- wi = Vector{Float64}(undef, n)
- ldvl = BlasIntType(1)
- if jobvl == 'V' || jobvl == 'v'
- ldvl = BlasIntType(n)
- end
- VL = Matrix{Float64}(undef, ldvl, n)
- ldvr = BlasIntType(1)
- if jobvr == 'V' || jobvr == 'v'
- ldvr = BlasIntType(n)
- end
- VR = Matrix{Float64}(undef, ldvr, n)
- info = Ref{BlasIntType}(0)
- work = Vector{Float64}(undef, 1) # Query optimal workspace size
- lwork = BlasIntType(-1)
- call_external_function(handle, :dgeev_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- jobvl, jobvr, BlasIntType(n), A, lda, wr, wi, VL, ldvl, VR, ldvr, work, lwork, info)
- if info[] == 0 && lwork[] == -1
- lwork = BlasIntType(round(Int, work[1]))
- work = Vector{Float64}(undef, lwork)
- call_external_function(handle, :dgeev_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- jobvl, jobvr, BlasIntType(n), A, lda, wr, wi, VL, ldvl, VR, ldvr, work, lwork, info)
- end
- return wr, wi, VL, VR, info[]
- end
- # GELSS: Least Squares solution using SVD
- # s/dgelss (single/double precision)
- function aocl_sgelss!(A::Matrix{Float32}, B::Matrix{Float32}, rcond::Float32)
- m, n = size(A)
- nrhs = size(B, 2)
- handle = max(m, n) > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = max(m, n) > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(m)
- ldb = BlasIntType(max(m, n))
- s = Vector{Float32}(undef, min(m, n)) # Singular values
- rank = Ref{BlasIntType}(0)
- info = Ref{BlasIntType}(0)
- work = Vector{Float32}(undef, 1) # Query optimal workspace size
- lwork = BlasIntType(-1)
- call_external_function(handle, :sgelss_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ref{BlasIntType}),
- BlasIntType(m), BlasIntType(n), BlasIntType(nrhs), A, lda, B, ldb,
- s, rcond, rank, work, lwork, info)
- if info[] == 0 && lwork[] == -1
- lwork = BlasIntType(round(Int, work[1]))
- work = Vector{Float32}(undef, lwork)
- call_external_function(handle, :sgelss_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ref{BlasIntType}),
- BlasIntType(m), BlasIntType(n), BlasIntType(nrhs), A, lda, B, ldb,
- s, rcond, rank, work, lwork, info)
- end
- return B, s, rank[], info[]
- end
- function aocl_dgelss!(A::Matrix{Float64}, B::Matrix{Float64}, rcond::Float64)
- m, n = size(A)
- nrhs = size(B, 2)
- handle = max(m, n) > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = max(m, n) > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(m)
- ldb = BlasIntType(max(m, n))
- s = Vector{Float64}(undef, min(m, n)) # Singular values
- rank = Ref{BlasIntType}(0)
- info = Ref{BlasIntType}(0)
- work = Vector{Float64}(undef, 1) # Query optimal workspace size
- lwork = BlasIntType(-1)
- call_external_function(handle, :dgelss_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- BlasIntType(m), BlasIntType(n), BlasIntType(nrhs), A, lda, B, ldb,
- s, rcond, rank, work, lwork, info)
- if info[] == 0 && lwork[] == -1
- lwork = BlasIntType(round(Int, work[1]))
- work = Vector{Float64}(undef, lwork)
- call_external_function(handle, :dgelss_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- BlasIntType(m), BlasIntType(n), BlasIntType(nrhs), A, lda, B, ldb,
- s, rcond, rank, work, lwork, info)
- end
- return B, s, rank[], info[]
- end
- # GELSD: Least Squares solution using SVD with divide and conquer
- # s/dgelsd (single/double precision)
- function aocl_sgelsd!(A::Matrix{Float32}, B::Matrix{Float32}, rcond::Float32)
- m, n = size(A)
- nrhs = size(B, 2)
- handle = max(m, n) > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = max(m, n) > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(m)
- ldb = BlasIntType(max(m, n))
- s = Vector{Float32}(undef, min(m, n)) # Singular values
- rank = Ref{BlasIntType}(0)
- info = Ref{BlasIntType}(0)
- # Query optimal workspace sizes
- work = Vector{Float32}(undef, 1)
- lwork = BlasIntType(-1)
- iwork = Vector{BlasIntType}(undef, 1) # IWORK size depends on M, N, NRHS
- call_external_function(handle, :sgelsd_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{BlasIntType}, Ref{BlasIntType}),
- BlasIntType(m), BlasIntType(n), BlasIntType(nrhs), A, lda, B, ldb,
- s, rcond, rank, work, lwork, iwork, info)
- if info[] == 0 && lwork[] == -1
- lwork = BlasIntType(round(Int, work[1]))
- # The optimal IWORK size is not returned by query, it's typically fixed or derived.
- # For GELSD, IWORK size is typically max(1, 3*MIN(M,N)*SMALSIZ + 11*MIN(M,N)) where SMALSIZ is a small constant.
- # For simplicity, we'll use a heuristic or a sufficiently large size if not querying.
- # For a proper wrapper, one might need to calculate this based on LAPACK's formula.
- # For now, we'll assume a reasonable fixed size or rely on the query for LWORK.
- # Since IWORK is not queried for size with LWORK=-1, we'll use a common safe upper bound.
- iwork_size = max(1, 3 * min(m,n) * 10 + 11 * min(m,n)) # Heuristic based on common LAPACK impl.
- iwork = Vector{BlasIntType}(undef, iwork_size)
- work = Vector{Float32}(undef, lwork)
- call_external_function(handle, :sgelsd_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{BlasIntType}, Ref{BlasIntType}),
- BlasIntType(m), BlasIntType(n), BlasIntType(nrhs), A, lda, B, ldb,
- s, rcond, rank, work, lwork, iwork, info)
- end
- return B, s, rank[], info[]
- end
- function aocl_dgelsd!(A::Matrix{Float64}, B::Matrix{Float64}, rcond::Float64)
- m, n = size(A)
- nrhs = size(B, 2)
- handle = max(m, n) > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = max(m, n) > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(m)
- ldb = BlasIntType(max(m, n))
- s = Vector{Float64}(undef, min(m, n)) # Singular values
- rank = Ref{BlasIntType}(0)
- info = Ref{BlasIntType}(0)
- work = Vector{Float64}(undef, 1) # Query optimal workspace size
- lwork = BlasIntType(-1)
- iwork = Vector{BlasIntType}(undef, 1)
- call_external_function(handle, :dgelsd_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{BlasIntType}, Ref{BlasIntType}),
- BlasIntType(m), BlasIntType(n), BlasIntType(nrhs), A, lda, B, ldb,
- s, rcond, rank, work, lwork, iwork, info)
- if info[] == 0 && lwork[] == -1
- lwork = BlasIntType(round(Int, work[1]))
- iwork_size = max(1, 3 * min(m,n) * 10 + 11 * min(m,n)) # Heuristic
- iwork = Vector{BlasIntType}(undef, iwork_size)
- work = Vector{Float64}(undef, lwork)
- call_external_function(handle, :dgelsd_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{BlasIntType}, Ref{BlasIntType}),
- BlasIntType(m), BlasIntType(n), BlasIntType(nrhs), A, lda, B, ldb,
- s, rcond, rank, work, lwork, iwork, info)
- end
- return B, s, rank[], info[]
- end
- # GELS: Least Squares solution using QR or LU factorization
- # s/dgels (single/double precision)
- function aocl_sgels!(trans::Char, A::Matrix{Float32}, B::Matrix{Float32})
- m, n = size(A)
- nrhs = size(B, 2)
- handle = max(m, n) > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = max(m, n) > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(m)
- ldb = BlasIntType(max(m, n)) # LDB must be at least max(M,N) for GELS
- info = Ref{BlasIntType}(0)
- work = Vector{Float32}(undef, 1) # Query optimal workspace size
- lwork = BlasIntType(-1)
- call_external_function(handle, :sgels_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType}, Ref{BlasIntType}),
- trans, BlasIntType(m), BlasIntType(n), BlasIntType(nrhs), A, lda, B, ldb,
- work, lwork, info)
- if info[] == 0 && lwork[] == -1
- lwork = BlasIntType(round(Int, work[1]))
- work = Vector{Float32}(undef, lwork)
- call_external_function(handle, :sgels_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType}, Ref{BlasIntType}),
- trans, BlasIntType(m), BlasIntType(n), BlasIntType(nrhs), A, lda, B, ldb,
- work, lwork, info)
- end
- return A, B, info[]
- end
- function aocl_dgels!(trans::Char, A::Matrix{Float64}, B::Matrix{Float64})
- m, n = size(A)
- nrhs = size(B, 2)
- handle = max(m, n) > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = max(m, n) > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(m)
- ldb = BlasIntType(max(m, n)) # LDB must be at least max(M,N) for GELS
- info = Ref{BlasIntType}(0)
- work = Vector{Float64}(undef, 1) # Query optimal workspace size
- lwork = BlasIntType(-1)
- call_external_function(handle, :dgels_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- trans, BlasIntType(m), BlasIntType(n), BlasIntType(nrhs), A, lda, B, ldb,
- work, lwork, info)
- if info[] == 0 && lwork[] == -1
- lwork = BlasIntType(round(Int, work[1]))
- work = Vector{Float64}(undef, lwork)
- call_external_function(handle, :dgels_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- trans, BlasIntType(m), BlasIntType(n), BlasIntType(nrhs), A, lda, B, ldb,
- work, lwork, info)
- end
- return A, B, info[]
- end
- # GEEQU: Estimate the condition number of a general matrix
- # s/dgeequ (single/double precision)
- function aocl_sgeequ!(A::Matrix{Float32})
- m, n = size(A)
- handle = m > typemax(Int32) || n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = m > typemax(Int32) || n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(m)
- r = Vector{Float32}(undef, m)
- c = Vector{Float32}(undef, n)
- row_cond = Ref{Float32}(0.0)
- col_cond = Ref{Float32}(0.0)
- amax = Ref{Float32}(0.0)
- info = Ref{BlasIntType}(0)
- call_external_function(handle, :sgeequ_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ptr{Float32}, Ref{Float32}, Ref{Float32}, Ref{Float32}, Ref{BlasIntType}),
- BlasIntType(m), BlasIntType(n), A, lda, r, c, row_cond, col_cond, amax, info)
- return r, c, row_cond[], col_cond[], amax[], info[]
- end
- function aocl_dgeequ!(A::Matrix{Float64})
- m, n = size(A)
- handle = m > typemax(Int32) || n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = m > typemax(Int32) || n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(m)
- r = Vector{Float64}(undef, m)
- c = Vector{Float64}(undef, n)
- row_cond = Ref{Float64}(0.0)
- col_cond = Ref{Float64}(0.0)
- amax = Ref{Float64}(0.0)
- info = Ref{BlasIntType}(0)
- call_external_function(handle, :dgeequ_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ptr{Float64}, Ref{Float64}, Ref{Float64}, Ref{Float64}, Ref{BlasIntType}),
- BlasIntType(m), BlasIntType(n), A, lda, r, c, row_cond, col_cond, amax, info)
- return r, c, row_cond[], col_cond[], amax[], info[]
- end
- # GECON: Estimate the reciprocal of the condition number of a general matrix
- # s/dgecon (single/double precision)
- function aocl_sgecon!(norm_type::Char, A::Matrix{Float32}, anorm::Float32)
- n = size(A, 1)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(n)
- rcond = Ref{Float32}(0.0)
- info = Ref{BlasIntType}(0)
- work = Vector{Float32}(undef, 4*n) # Workspace size
- iwork = Vector{BlasIntType}(undef, n) # Integer workspace
- call_external_function(handle, :sgecon_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType}, Ref{Float32},
- Ref{Float32}, Ptr{Float32}, Ptr{BlasIntType}, Ref{BlasIntType}),
- norm_type, BlasIntType(n), A, lda, anorm, rcond, work, iwork, info)
- return rcond[], info[]
- end
- function aocl_dgecon!(norm_type::Char, A::Matrix{Float64}, anorm::Float64)
- n = size(A, 1)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(n)
- rcond = Ref{Float64}(0.0)
- info = Ref{BlasIntType}(0)
- work = Vector{Float64}(undef, 4*n) # Workspace size
- iwork = Vector{BlasIntType}(undef, n) # Integer workspace
- call_external_function(handle, :dgecon_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}, Ref{Float64},
- Ref{Float64}, Ptr{Float64}, Ptr{BlasIntType}, Ref{BlasIntType}),
- norm_type, BlasIntType(n), A, lda, anorm, rcond, work, iwork, info)
- return rcond[], info[]
- end
- # GESDD: Singular Value Decomposition using divide and conquer
- # s/dgesdd (single/double precision)
- function aocl_sgesdd!(jobz::Char, A::Matrix{Float32})
- m, n = size(A)
- handle = m > typemax(Int32) || n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = m > typemax(Int32) || n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(m)
- s = Vector{Float32}(undef, min(m, n)) # Singular values
- # Determine dimensions for U and VT based on jobz
- ldu = BlasIntType(1)
- if jobz == 'A' || jobz == 'a'
- ldu = BlasIntType(m)
- elseif jobz == 'S' || jobz == 's' || jobz == 'O' || jobz == 'o'
- ldu = BlasIntType(m)
- end
- U = Matrix{Float32}(undef, ldu, (jobz == 'A' || jobz == 'a') ? m : (jobz == 'S' || jobz == 's' ? min(m, n) : 1))
- ldvt = BlasIntType(1)
- if jobz == 'A' || jobz == 'a'
- ldvt = BlasIntType(n)
- elseif jobz == 'O' || jobz == 'o'
- ldvt = BlasIntType(min(m, n))
- elseif jobz == 'S' || jobz == 's'
- ldvt = BlasIntType(min(m, n))
- end
- VT = Matrix{Float32}(undef, ldvt, n)
- info = Ref{BlasIntType}(0)
- work = Vector{Float32}(undef, 1) # Query optimal workspace size
- lwork = BlasIntType(-1)
- iwork = Vector{BlasIntType}(undef, 1) # Query optimal integer workspace size
- call_external_function(handle, :sgesdd_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{BlasIntType}, Ref{BlasIntType}),
- jobz, BlasIntType(m), BlasIntType(n), A, lda, s, U, ldu, VT, ldvt, work, lwork, iwork, info)
- if info[] == 0 && lwork[] == -1
- lwork = BlasIntType(round(Int, work[1]))
- iwork_size = BlasIntType(round(Int, iwork[1])) # If iwork is also queried
- # If iwork is not queried, a heuristic is needed. For GESDD, it's typically 8*min(M,N) for 'A' or 'S', 5*min(M,N) for 'O'.
- if iwork_size == 1 && (jobz == 'A' || jobz == 'S' || jobz == 'a' || jobz == 's')
- iwork_size = BlasIntType(8 * min(m,n))
- elseif iwork_size == 1 && (jobz == 'O' || jobz == 'o')
- iwork_size = BlasIntType(5 * min(m,n))
- end
- work = Vector{Float32}(undef, lwork)
- iwork = Vector{BlasIntType}(undef, iwork_size)
- call_external_function(handle, :sgesdd_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{BlasIntType}, Ref{BlasIntType}),
- jobz, BlasIntType(m), BlasIntType(n), A, lda, s, U, ldu, VT, ldvt, work, lwork, iwork, info)
- end
- return U, s, VT, info[]
- end
- function aocl_dgesdd!(jobz::Char, A::Matrix{Float64})
- m, n = size(A)
- handle = m > typemax(Int32) || n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = m > typemax(Int32) || n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(m)
- s = Vector{Float64}(undef, min(m, n)) # Singular values
- ldu = BlasIntType(1)
- if jobz == 'A' || jobz == 'a'
- ldu = BlasIntType(m)
- elseif jobz == 'S' || jobz == 's' || jobz == 'O' || jobz == 'o'
- ldu = BlasIntType(m)
- end
- U = Matrix{Float64}(undef, ldu, (jobz == 'A' || jobz == 'a') ? m : (jobz == 'S' || jobz == 's' ? min(m, n) : 1))
- ldvt = BlasIntType(1)
- if jobz == 'A' || jobz == 'a'
- ldvt = BlasIntType(n)
- elseif jobz == 'O' || jobz == 'o'
- ldvt = BlasIntType(min(m, n))
- elseif jobz == 'S' || jobz == 's'
- ldvt = BlasIntType(min(m, n))
- end
- VT = Matrix{Float64}(undef, ldvt, n)
- info = Ref{BlasIntType}(0)
- work = Vector{Float64}(undef, 1) # Query optimal workspace size
- lwork = BlasIntType(-1)
- iwork = Vector{BlasIntType}(undef, 1) # Query optimal integer workspace size
- call_external_function(handle, :dgesdd_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{BlasIntType}, Ref{BlasIntType}),
- jobz, BlasIntType(m), BlasIntType(n), A, lda, s, U, ldu, VT, ldvt, work, lwork, iwork, info)
- if info[] == 0 && lwork[] == -1
- lwork = BlasIntType(round(Int, work[1]))
- iwork_size = BlasIntType(round(Int, iwork[1]))
- if iwork_size == 1 && (jobz == 'A' || jobz == 'S' || jobz == 'a' || jobz == 's')
- iwork_size = BlasIntType(8 * min(m,n))
- elseif iwork_size == 1 && (jobz == 'O' || jobz == 'o')
- iwork_size = BlasIntType(5 * min(m,n))
- end
- work = Vector{Float64}(undef, lwork)
- iwork = Vector{BlasIntType}(undef, iwork_size)
- call_external_function(handle, :dgesdd_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{BlasIntType}, Ref{BlasIntType}),
- jobz, BlasIntType(m), BlasIntType(n), A, lda, s, U, ldu, VT, ldvt, work, lwork, iwork, info)
- end
- return U, s, VT, info[]
- end
- # GEEVX: Compute eigenvalues and, optionally, eigenvectors of a general matrix
- # s/dgeevx (single/double precision)
- function aocl_sgeevx!(balanc::Char, jobvl::Char, jobvr::Char, sense::Char, A::Matrix{Float32})
- n = size(A, 1)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(n)
- wr = Vector{Float32}(undef, n)
- wi = Vector{Float32}(undef, n)
- ldvl = BlasIntType(1)
- if jobvl == 'V' || jobvl == 'v'
- ldvl = BlasIntType(n)
- end
- VL = Matrix{Float32}(undef, ldvl, n)
- ldvr = BlasIntType(1)
- if jobvr == 'V' || jobvr == 'v'
- ldvr = BlasIntType(n)
- end
- VR = Matrix{Float32}(undef, ldvr, n)
- ilo = Ref{BlasIntType}(0)
- ihi = Ref{BlasIntType}(0)
- scale = Vector{Float32}(undef, n)
- abnrm = Ref{Float32}(0.0)
- rconde = Vector{Float32}(undef, n)
- rcondv = Vector{Float32}(undef, n)
- info = Ref{BlasIntType}(0)
- work = Vector{Float32}(undef, 1) # Query optimal workspace size
- lwork = BlasIntType(-1)
- iwork = Vector{BlasIntType}(undef, 1) # Query optimal integer workspace size
- call_external_function(handle, :sgeevx_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float32}, Ref{Float32},
- Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Ref{BlasIntType}, Ptr{BlasIntType}, Ref{BlasIntType}),
- balanc, jobvl, jobvr, sense, BlasIntType(n), A, lda, wr, wi, VL, ldvl, VR, ldvr,
- ilo, ihi, scale, abnrm, rconde, rcondv, work, lwork, iwork, info)
- if info[] == 0 && lwork[] == -1
- lwork = BlasIntType(round(Int, work[1]))
- iwork_size = BlasIntType(round(Int, iwork[1]))
- work = Vector{Float32}(undef, lwork)
- iwork = Vector{BlasIntType}(undef, iwork_size)
- call_external_function(handle, :sgeevx_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float32}, Ref{Float32},
- Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Ref{BlasIntType}, Ptr{BlasIntType}, Ref{BlasIntType}),
- balanc, jobvl, jobvr, sense, BlasIntType(n), A, lda, wr, wi, VL, ldvl, VR, ldvr,
- ilo, ihi, scale, abnrm, rconde, rcondv, work, lwork, iwork, info)
- end
- return wr, wi, VL, VR, ilo[], ihi[], scale, abnrm[], rconde, rcondv, info[]
- end
- function aocl_dgeevx!(balanc::Char, jobvl::Char, jobvr::Char, sense::Char, A::Matrix{Float64})
- n = size(A, 1)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(n)
- wr = Vector{Float64}(undef, n)
- wi = Vector{Float64}(undef, n)
- ldvl = BlasIntType(1)
- if jobvl == 'V' || jobvl == 'v'
- ldvl = BlasIntType(n)
- end
- VL = Matrix{Float64}(undef, ldvl, n)
- ldvr = BlasIntType(1)
- if jobvr == 'V' || jobvr == 'v'
- ldvr = BlasIntType(n)
- end
- VR = Matrix{Float64}(undef, ldvr, n)
- ilo = Ref{BlasIntType}(0)
- ihi = Ref{BlasIntType}(0)
- scale = Vector{Float64}(undef, n)
- abnrm = Ref{Float64}(0.0)
- rconde = Vector{Float64}(undef, n)
- rcondv = Vector{Float64}(undef, n)
- info = Ref{BlasIntType}(0)
- work = Vector{Float64}(undef, 1) # Query optimal workspace size
- lwork = BlasIntType(-1)
- iwork = Vector{BlasIntType}(undef, 1) # Query optimal integer workspace size
- call_external_function(handle, :dgeevx_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float64}, Ref{Float64},
- Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Ref{BlasIntType}, Ptr{BlasIntType}, Ref{BlasIntType}),
- balanc, jobvl, jobvr, sense, BlasIntType(n), A, lda, wr, wi, VL, ldvl, VR, ldvr,
- ilo, ihi, scale, abnrm, rconde, rcondv, work, lwork, iwork, info)
- if info[] == 0 && lwork[] == -1
- lwork = BlasIntType(round(Int, work[1]))
- iwork_size = BlasIntType(round(Int, iwork[1]))
- work = Vector{Float64}(undef, lwork)
- iwork = Vector{BlasIntType}(undef, iwork_size)
- call_external_function(handle, :dgeevx_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float64}, Ref{Float64},
- Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Ref{BlasIntType}, Ptr{BlasIntType}, Ref{BlasIntType}),
- balanc, jobvl, jobvr, sense, BlasIntType(n), A, lda, wr, wi, VL, ldvl, VR, ldvr,
- ilo, ihi, scale, abnrm, rconde, rcondv, work, lwork, iwork, info)
- end
- return wr, wi, VL, VR, ilo[], ihi[], scale, abnrm[], rconde, rcondv, info[]
- end
- # GETF2: LU factorization (unblocked algorithm)
- # s/dgetf2 (single/double precision)
- function aocl_sgetf2!(A::Matrix{Float32})
- m, n = size(A)
- handle = m > typemax(Int32) || n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = m > typemax(Int32) || n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(m)
- ipiv = Vector{Int32}(undef, min(m, n))
- info = Ref{BlasIntType}(0)
- call_external_function(handle, :sgetf2_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Int32}, Ref{BlasIntType}),
- BlasIntType(m), BlasIntType(n), A, lda, ipiv, info)
- return A, ipiv, info[]
- end
- function aocl_dgetf2!(A::Matrix{Float64})
- m, n = size(A)
- handle = m > typemax(Int32) || n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = m > typemax(Int32) || n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(m)
- ipiv = Vector{Int32}(undef, min(m, n))
- info = Ref{BlasIntType}(0)
- call_external_function(handle, :dgetf2_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Int32}, Ref{BlasIntType}),
- BlasIntType(m), BlasIntType(n), A, lda, ipiv, info)
- return A, ipiv, info[]
- end
- # GBSV: Solve general band system of linear equations
- # s/dgbsv (single/double precision)
- function aocl_sgbsv!(A::Matrix{Float32}, B::Matrix{Float32}, kl::Int, ku::Int)
- n = size(A, 2) # N is the number of columns in A (matrix is N x N)
- nrhs = size(B, 2)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(size(A, 1)) # LDA = KL + KU + 1
- ldb = BlasIntType(n)
- ipiv = Vector{Int32}(undef, n)
- info = Ref{BlasIntType}(0)
- call_external_function(handle, :sgbsv_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{Int32}, Ptr{Float32}, Ref{BlasIntType}, Ref{BlasIntType}),
- BlasIntType(n), BlasIntType(kl), BlasIntType(ku), BlasIntType(nrhs),
- A, lda, ipiv, B, ldb, info)
- return A, B, ipiv, info[]
- end
- function aocl_dgbsv!(A::Matrix{Float64}, B::Matrix{Float64}, kl::Int, ku::Int)
- n = size(A, 2) # N is the number of columns in A (matrix is N x N)
- nrhs = size(B, 2)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(size(A, 1)) # LDA = KL + KU + 1
- ldb = BlasIntType(n)
- ipiv = Vector{Int32}(undef, n)
- info = Ref{BlasIntType}(0)
- call_external_function(handle, :dgbsv_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{Int32}, Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- BlasIntType(n), BlasIntType(kl), BlasIntType(ku), BlasIntType(nrhs),
- A, lda, ipiv, B, ldb, info)
- return A, B, ipiv, info[]
- end
- # GTSV: Solve general tridiagonal system of linear equations
- # s/dgtsv (single/double precision)
- function aocl_sgtsv!(dl::Vector{Float32}, d::Vector{Float32}, du::Vector{Float32}, B::Matrix{Float32})
- n = length(d)
- nrhs = size(B, 2)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- ldb = BlasIntType(n)
- info = Ref{BlasIntType}(0)
- call_external_function(handle, :sgtsv_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float32}, Ptr{Float32}, Ptr{Float32},
- Ptr{Float32}, Ref{BlasIntType}, Ref{BlasIntType}),
- BlasIntType(n), BlasIntType(nrhs), dl, d, du, B, ldb, info)
- return dl, d, du, B, info[]
- end
- function aocl_dgtsv!(dl::Vector{Float64}, d::Vector{Float64}, du::Vector{Float64}, B::Matrix{Float64})
- n = length(d)
- nrhs = size(B, 2)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- ldb = BlasIntType(n)
- info = Ref{BlasIntType}(0)
- call_external_function(handle, :dgtsv_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float64}, Ptr{Float64}, Ptr{Float64},
- Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- BlasIntType(n), BlasIntType(nrhs), dl, d, du, B, ldb, info)
- return dl, d, du, B, info[]
- end
- # GGEV: Compute eigenvalues and, optionally, eigenvectors of a general matrix pencil (A,B)
- # s/dggev (single/double precision)
- function aocl_sggev!(jobvl::Char, jobvr::Char, A::Matrix{Float32}, B::Matrix{Float32})
- n = size(A, 1)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(n)
- ldb = BlasIntType(n)
- alphar = Vector{Float32}(undef, n)
- alphai = Vector{Float32}(undef, n)
- beta = Vector{Float32}(undef, n)
- ldvl = BlasIntType(1)
- if jobvl == 'V' || jobvl == 'v'
- ldvl = BlasIntType(n)
- end
- VL = Matrix{Float32}(undef, ldvl, n)
- ldvr = BlasIntType(1)
- if jobvr == 'V' || jobvr == 'v'
- ldvr = BlasIntType(n)
- end
- VR = Matrix{Float32}(undef, ldvr, n)
- info = Ref{BlasIntType}(0)
- work = Vector{Float32}(undef, 1) # Query optimal workspace size
- lwork = BlasIntType(-1)
- call_external_function(handle, :sggev_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ptr{Float32}, Ptr{Float32},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ref{BlasIntType}),
- jobvl, jobvr, BlasIntType(n), A, lda, B, ldb, alphar, alphai, beta,
- VL, ldvl, VR, ldvr, work, lwork, info)
- if info[] == 0 && lwork[] == -1
- lwork = BlasIntType(round(Int, work[1]))
- work = Vector{Float32}(undef, lwork)
- call_external_function(handle, :sggev_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ptr{Float32}, Ptr{Float32},
- Ptr{Float32}, Ref{BlasIntType}, Ptr{Float32}, Ref{BlasIntType},
- Ptr{Float32}, Ref{BlasIntType}, Ref{BlasIntType}),
- jobvl, jobvr, BlasIntType(n), A, lda, B, ldb, alphar, alphai, beta,
- VL, ldvl, VR, ldvr, work, lwork, info)
- end
- return alphar, alphai, beta, VL, VR, info[]
- end
- function aocl_dggev!(jobvl::Char, jobvr::Char, A::Matrix{Float64}, B::Matrix{Float64})
- n = size(A, 1)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(n)
- ldb = BlasIntType(n)
- alphar = Vector{Float64}(undef, n)
- alphai = Vector{Float64}(undef, n)
- beta = Vector{Float64}(undef, n)
- ldvl = BlasIntType(1)
- if jobvl == 'V' || jobvl == 'v'
- ldvl = BlasIntType(n)
- end
- VL = Matrix{Float64}(undef, ldvl, n)
- ldvr = BlasIntType(1)
- if jobvr == 'V' || jobvr == 'v'
- ldvr = BlasIntType(n)
- end
- VR = Matrix{Float64}(undef, ldvr, n)
- info = Ref{BlasIntType}(0)
- work = Vector{Float64}(undef, 1) # Query optimal workspace size
- lwork = BlasIntType(-1)
- call_external_function(handle, :dggev_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ptr{Float64}, Ptr{Float64},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- jobvl, jobvr, BlasIntType(n), A, lda, B, ldb, alphar, alphai, beta,
- VL, ldvl, VR, ldvr, work, lwork, info)
- if info[] == 0 && lwork[] == -1
- lwork = BlasIntType(round(Int, work[1]))
- work = Vector{Float64}(undef, lwork)
- call_external_function(handle, :dggev_, Cvoid,
- (Ref{UInt8}, Ref{UInt8}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ptr{Float64}, Ptr{Float64},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- jobvl, jobvr, BlasIntType(n), A, lda, B, ldb, alphar, alphai, beta,
- VL, ldvl, VR, ldvr, work, lwork, info)
- end
- return alphar, alphai, beta, VL, VR, info[]
- end
- # POSV: Solves AX = B for symmetric positive definite A
- # dposv (double precision)
- function aocl_dposv!(uplo::Char, A::Matrix{Float64}, B::Matrix{Float64})
- n = size(A, 1)
- nrhs = size(B, 2)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(n)
- ldb = BlasIntType(n)
- info = Ref{BlasIntType}(0)
- call_external_function(handle, :dposv_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- uplo, BlasIntType(n), BlasIntType(nrhs), A, lda, B, ldb, info)
- return A, B, info[]
- end
- # SYTRF: Computes the Bunch-Kaufman factorization of a real symmetric indefinite matrix
- # dsytrf (double precision)
- function aocl_dsytrf!(uplo::Char, A::Matrix{Float64})
- n = size(A, 1)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(n)
- ipiv = Vector{BlasIntType}(undef, n) # IPIV for SYTRF is BlasIntType
- info = Ref{BlasIntType}(0)
- work = Vector{Float64}(undef, 1) # Query optimal workspace size
- lwork = BlasIntType(-1)
- call_external_function(handle, :dsytrf_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- uplo, BlasIntType(n), A, lda, ipiv, work, lwork, info)
- if info[] == 0 && lwork[] == -1
- lwork = BlasIntType(round(Int, work[1]))
- work = Vector{Float64}(undef, lwork)
- call_external_function(handle, :dsytrf_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- uplo, BlasIntType(n), A, lda, ipiv, work, lwork, info)
- end
- return A, ipiv, info[]
- end
- # SYTRS: Solves a system of linear equations using the Bunch-Kaufman factorization
- # dsytrs (double precision)
- function aocl_dsytrs!(uplo::Char, A::Matrix{Float64}, ipiv::Vector{BlasInt}, B::Matrix{Float64})
- n = size(A, 1)
- nrhs = size(B, 2)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(n)
- ldb = BlasIntType(n)
- info = Ref{BlasIntType}(0)
- call_external_function(handle, :dsytrs_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- uplo, BlasIntType(n), BlasIntType(nrhs), A, lda, ipiv, B, ldb, info)
- return B, info[]
- end
- # SYSV: Solves AX = B for symmetric indefinite A
- # dsysv (double precision)
- function aocl_dsysv!(uplo::Char, A::Matrix{Float64}, B::Matrix{Float64})
- n = size(A, 1)
- nrhs = size(B, 2)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(n)
- ldb = BlasIntType(n)
- ipiv = Vector{BlasIntType}(undef, n)
- info = Ref{BlasIntType}(0)
- work = Vector{Float64}(undef, 1) # Query optimal workspace size
- lwork = BlasIntType(-1)
- call_external_function(handle, :dsysv_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- uplo, BlasIntType(n), BlasIntType(nrhs), A, lda, ipiv, B, ldb, work, lwork, info)
- if info[] == 0 && lwork[] == -1
- lwork = BlasIntType(round(Int, work[1]))
- work = Vector{Float64}(undef, lwork)
- call_external_function(handle, :dsysv_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType},
- Ptr{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}, Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- uplo, BlasIntType(n), BlasIntType(nrhs), A, lda, ipiv, B, ldb, work, lwork, info)
- end
- return A, B, ipiv, info[]
- end
- # GBTRF: Computes the LU factorization of a general band matrix
- # dgbtrf (double precision)
- function aocl_dgbtrf!(A::Matrix{Float64}, m::Int, n::Int, kl::Int, ku::Int)
- handle = max(m, n) > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = max(m, n) > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(size(A, 1)) # LDA for band matrix is KL + KU + 1
- ipiv = Vector{Int32}(undef, min(m, n))
- info = Ref{BlasIntType}(0)
- call_external_function(handle, :dgbtrf_, Cvoid,
- (Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{Int32}, Ref{BlasIntType}),
- BlasIntType(m), BlasIntType(n), BlasIntType(kl), BlasIntType(ku),
- A, lda, ipiv, info)
- return A, ipiv, info[]
- end
- # GBTRS: Solves a system of linear equations using the LU factorization of a general band matrix
- # dgbtrs (double precision)
- function aocl_dgbtrs!(trans::Char, A::Matrix{Float64}, ipiv::Vector{Int32}, B::Matrix{Float64}, kl::Int, ku::Int)
- n = size(A, 2) # A is N x N, stored in band format
- nrhs = size(B, 2)
- handle = n > typemax(Int32) ? flame_ilp64_handle : flame_lp64_handle
- BlasIntType = n > typemax(Int32) ? Int64 : Int32
- lda = BlasIntType(size(A, 1)) # LDA = KL + KU + 1
- ldb = BlasIntType(n)
- info = Ref{BlasIntType}(0)
- call_external_function(handle, :dgbtrs_, Cvoid,
- (Ref{UInt8}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType}, Ref{BlasIntType},
- Ptr{Float64}, Ref{BlasIntType}, Ptr{Int32}, Ptr{Float64}, Ref{BlasIntType}, Ref{BlasIntType}),
- trans, BlasIntType(n), BlasIntType(kl), BlasIntType(ku), BlasIntType(nrhs),
- A, lda, ipiv, B, ldb, info)
- return B, info[]
- end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement