mirror of
https://github.com/ml-explore/mlx.git
synced 2025-09-19 10:48:09 +08:00
CPU mx.linalg.cholesky_inverse and mx.linalg.tri_inv (#1307)
* add cholesky inv + tri inv * always run tri_inv on cpu * consistent naming
This commit is contained in:
@@ -10,9 +10,106 @@
|
||||
#include <lapack.h>
|
||||
#endif
|
||||
|
||||
// Wrapper to account for differences in
|
||||
// LAPACK implementations (basically how to pass the 'uplo' string to fortran).
|
||||
int strtri_wrapper(char uplo, char diag, float* matrix, int N) {
|
||||
int info;
|
||||
|
||||
#ifdef LAPACK_FORTRAN_STRLEN_END
|
||||
strtri_(
|
||||
/* uplo = */ &uplo,
|
||||
/* diag = */ &diag,
|
||||
/* N = */ &N,
|
||||
/* a = */ matrix,
|
||||
/* lda = */ &N,
|
||||
/* info = */ &info,
|
||||
/* uplo_len = */ static_cast<size_t>(1),
|
||||
/* diag_len = */ static_cast<size_t>(1));
|
||||
#else
|
||||
strtri_(
|
||||
/* uplo = */ &uplo,
|
||||
/* diag = */ &diag,
|
||||
/* N = */ &N,
|
||||
/* a = */ matrix,
|
||||
/* lda = */ &N,
|
||||
/* info = */ &info);
|
||||
#endif
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
namespace mlx::core {
|
||||
|
||||
void inverse_impl(const array& a, array& inv) {
|
||||
void general_inv(array& inv, int N, int i) {
|
||||
int info;
|
||||
auto ipiv = array::Data{allocator::malloc_or_wait(sizeof(int) * N)};
|
||||
// Compute LU factorization.
|
||||
sgetrf_(
|
||||
/* m = */ &N,
|
||||
/* n = */ &N,
|
||||
/* a = */ inv.data<float>() + N * N * i,
|
||||
/* lda = */ &N,
|
||||
/* ipiv = */ static_cast<int*>(ipiv.buffer.raw_ptr()),
|
||||
/* info = */ &info);
|
||||
|
||||
if (info != 0) {
|
||||
std::stringstream ss;
|
||||
ss << "inverse_impl: LU factorization failed with error code " << info;
|
||||
throw std::runtime_error(ss.str());
|
||||
}
|
||||
|
||||
static const int lwork_query = -1;
|
||||
float workspace_size = 0;
|
||||
|
||||
// Compute workspace size.
|
||||
sgetri_(
|
||||
/* m = */ &N,
|
||||
/* a = */ nullptr,
|
||||
/* lda = */ &N,
|
||||
/* ipiv = */ nullptr,
|
||||
/* work = */ &workspace_size,
|
||||
/* lwork = */ &lwork_query,
|
||||
/* info = */ &info);
|
||||
|
||||
if (info != 0) {
|
||||
std::stringstream ss;
|
||||
ss << "inverse_impl: LU workspace calculation failed with error code "
|
||||
<< info;
|
||||
throw std::runtime_error(ss.str());
|
||||
}
|
||||
|
||||
const int lwork = workspace_size;
|
||||
auto scratch = array::Data{allocator::malloc_or_wait(sizeof(float) * lwork)};
|
||||
|
||||
// Compute inverse.
|
||||
sgetri_(
|
||||
/* m = */ &N,
|
||||
/* a = */ inv.data<float>() + N * N * i,
|
||||
/* lda = */ &N,
|
||||
/* ipiv = */ static_cast<int*>(ipiv.buffer.raw_ptr()),
|
||||
/* work = */ static_cast<float*>(scratch.buffer.raw_ptr()),
|
||||
/* lwork = */ &lwork,
|
||||
/* info = */ &info);
|
||||
|
||||
if (info != 0) {
|
||||
std::stringstream ss;
|
||||
ss << "inverse_impl: inversion failed with error code " << info;
|
||||
throw std::runtime_error(ss.str());
|
||||
}
|
||||
}
|
||||
|
||||
void tri_inv(array& inv, int N, int i, bool upper) {
|
||||
const char uplo = upper ? 'L' : 'U';
|
||||
const char diag = 'N';
|
||||
int info = strtri_wrapper(uplo, diag, inv.data<float>() + N * N * i, N);
|
||||
if (info != 0) {
|
||||
std::stringstream ss;
|
||||
ss << "inverse_impl: triangular inversion failed with error code " << info;
|
||||
throw std::runtime_error(ss.str());
|
||||
}
|
||||
}
|
||||
|
||||
void inverse_impl(const array& a, array& inv, bool tri, bool upper) {
|
||||
// Lapack uses the column-major convention. We take advantage of the following
|
||||
// identity to avoid transposing (see
|
||||
// https://math.stackexchange.com/a/340234):
|
||||
@@ -24,63 +121,11 @@ void inverse_impl(const array& a, array& inv) {
|
||||
const int N = a.shape(-1);
|
||||
const size_t num_matrices = a.size() / (N * N);
|
||||
|
||||
int info;
|
||||
auto ipiv = array::Data{allocator::malloc_or_wait(sizeof(int) * N)};
|
||||
|
||||
for (int i = 0; i < num_matrices; i++) {
|
||||
// Compute LU factorization.
|
||||
sgetrf_(
|
||||
/* m = */ &N,
|
||||
/* n = */ &N,
|
||||
/* a = */ inv.data<float>() + N * N * i,
|
||||
/* lda = */ &N,
|
||||
/* ipiv = */ static_cast<int*>(ipiv.buffer.raw_ptr()),
|
||||
/* info = */ &info);
|
||||
|
||||
if (info != 0) {
|
||||
std::stringstream ss;
|
||||
ss << "inverse_impl: LU factorization failed with error code " << info;
|
||||
throw std::runtime_error(ss.str());
|
||||
}
|
||||
|
||||
static const int lwork_query = -1;
|
||||
float workspace_size = 0;
|
||||
|
||||
// Compute workspace size.
|
||||
sgetri_(
|
||||
/* m = */ &N,
|
||||
/* a = */ nullptr,
|
||||
/* lda = */ &N,
|
||||
/* ipiv = */ nullptr,
|
||||
/* work = */ &workspace_size,
|
||||
/* lwork = */ &lwork_query,
|
||||
/* info = */ &info);
|
||||
|
||||
if (info != 0) {
|
||||
std::stringstream ss;
|
||||
ss << "inverse_impl: LU workspace calculation failed with error code "
|
||||
<< info;
|
||||
throw std::runtime_error(ss.str());
|
||||
}
|
||||
|
||||
const int lwork = workspace_size;
|
||||
auto scratch =
|
||||
array::Data{allocator::malloc_or_wait(sizeof(float) * lwork)};
|
||||
|
||||
// Compute inverse.
|
||||
sgetri_(
|
||||
/* m = */ &N,
|
||||
/* a = */ inv.data<float>() + N * N * i,
|
||||
/* lda = */ &N,
|
||||
/* ipiv = */ static_cast<int*>(ipiv.buffer.raw_ptr()),
|
||||
/* work = */ static_cast<float*>(scratch.buffer.raw_ptr()),
|
||||
/* lwork = */ &lwork,
|
||||
/* info = */ &info);
|
||||
|
||||
if (info != 0) {
|
||||
std::stringstream ss;
|
||||
ss << "inverse_impl: inversion failed with error code " << info;
|
||||
throw std::runtime_error(ss.str());
|
||||
if (tri) {
|
||||
tri_inv(inv, N, i, upper);
|
||||
} else {
|
||||
general_inv(inv, N, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -89,7 +134,7 @@ void Inverse::eval(const std::vector<array>& inputs, array& output) {
|
||||
if (inputs[0].dtype() != float32) {
|
||||
throw std::runtime_error("[Inverse::eval] only supports float32.");
|
||||
}
|
||||
inverse_impl(inputs[0], output);
|
||||
inverse_impl(inputs[0], output, tri_, upper_);
|
||||
}
|
||||
|
||||
} // namespace mlx::core
|
||||
|
@@ -238,7 +238,7 @@ std::vector<array> svd(const array& a, StreamOrDevice s /* = {} */) {
|
||||
{a});
|
||||
}
|
||||
|
||||
array inv(const array& a, StreamOrDevice s /* = {} */) {
|
||||
array inv_impl(const array& a, bool tri, bool upper, StreamOrDevice s) {
|
||||
if (a.dtype() != float32) {
|
||||
std::ostringstream msg;
|
||||
msg << "[linalg::inv] Arrays must type float32. Received array "
|
||||
@@ -258,7 +258,21 @@ array inv(const array& a, StreamOrDevice s /* = {} */) {
|
||||
}
|
||||
|
||||
return array(
|
||||
a.shape(), a.dtype(), std::make_shared<Inverse>(to_stream(s)), {a});
|
||||
a.shape(),
|
||||
a.dtype(),
|
||||
std::make_shared<Inverse>(to_stream(s), tri, upper),
|
||||
{a});
|
||||
}
|
||||
|
||||
array inv(const array& a, StreamOrDevice s /* = {} */) {
|
||||
return inv_impl(a, /*tri=*/false, /*upper=*/true, s);
|
||||
}
|
||||
|
||||
array tri_inv(
|
||||
const array& a,
|
||||
bool upper /* = true */,
|
||||
StreamOrDevice s /* = {} */) {
|
||||
return inv_impl(a, /*tri=*/true, upper, s);
|
||||
}
|
||||
|
||||
array cholesky(
|
||||
@@ -292,4 +306,37 @@ array cholesky(
|
||||
{a});
|
||||
}
|
||||
|
||||
array cholesky_inv(
|
||||
const array& L,
|
||||
bool upper /* = false */,
|
||||
StreamOrDevice s /* = {} */) {
|
||||
if (L.dtype() != float32) {
|
||||
std::ostringstream msg;
|
||||
msg << "[linalg::cholesky] Arrays must type float32. Received array "
|
||||
<< "with type " << L.dtype() << ".";
|
||||
throw std::invalid_argument(msg.str());
|
||||
}
|
||||
|
||||
if (L.ndim() < 2) {
|
||||
std::ostringstream msg;
|
||||
msg << "[linalg::cholesky] Arrays must have >= 2 dimensions. Received array "
|
||||
"with "
|
||||
<< L.ndim() << " dimensions.";
|
||||
throw std::invalid_argument(msg.str());
|
||||
}
|
||||
|
||||
if (L.shape(-1) != L.shape(-2)) {
|
||||
throw std::invalid_argument(
|
||||
"[linalg::cholesky] Cholesky inverse is only defined for square "
|
||||
"matrices.");
|
||||
}
|
||||
|
||||
array L_inv = tri_inv(L, upper, s);
|
||||
if (upper) {
|
||||
return matmul(L_inv, swapaxes(L_inv, -1, -2, s), s);
|
||||
} else {
|
||||
return matmul(swapaxes(L_inv, -1, -2, s), L_inv, s);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace mlx::core::linalg
|
||||
|
@@ -66,6 +66,10 @@ std::vector<array> svd(const array& a, StreamOrDevice s = {});
|
||||
|
||||
array inv(const array& a, StreamOrDevice s = {});
|
||||
|
||||
array tri_inv(const array& a, bool upper = false, StreamOrDevice s = {});
|
||||
|
||||
array cholesky(const array& a, bool upper = false, StreamOrDevice s = {});
|
||||
|
||||
array cholesky_inv(const array& a, bool upper = false, StreamOrDevice s = {});
|
||||
|
||||
} // namespace mlx::core::linalg
|
||||
|
@@ -2127,7 +2127,8 @@ class SVD : public Primitive {
|
||||
/* Matrix inversion primitive. */
|
||||
class Inverse : public UnaryPrimitive {
|
||||
public:
|
||||
explicit Inverse(Stream stream) : UnaryPrimitive(stream) {}
|
||||
explicit Inverse(Stream stream, bool tri, bool upper)
|
||||
: UnaryPrimitive(stream), tri_(tri), upper_(upper) {}
|
||||
|
||||
void eval_cpu(const std::vector<array>& inputs, array& output) override;
|
||||
void eval_gpu(const std::vector<array>& inputs, array& output) override;
|
||||
@@ -2137,6 +2138,8 @@ class Inverse : public UnaryPrimitive {
|
||||
|
||||
private:
|
||||
void eval(const std::vector<array>& inputs, array& output);
|
||||
bool tri_;
|
||||
bool upper_;
|
||||
};
|
||||
|
||||
class Cholesky : public UnaryPrimitive {
|
||||
|
Reference in New Issue
Block a user