CPU LU factorization and linear solvers (#1451)

* linalg solve backend

* nits

* more nits + fix

* luf primitive and lu, solve, and solve_triangular backends

* changes / nits

---------

Co-authored-by: Awni Hannun <awni@apple.com>
This commit is contained in:
Abe Leininger
2025-02-10 14:32:24 -06:00
committed by GitHub
parent 7df3f792a2
commit a5ededf1c3
12 changed files with 571 additions and 15 deletions

View File

@@ -14,13 +14,6 @@ namespace mx = mlx::core;
namespace nb = nanobind;
using namespace nb::literals;
namespace {
nb::tuple svd_helper(const mx::array& a, mx::StreamOrDevice s /* = {} */) {
const auto result = mx::linalg::svd(a, s);
return nb::make_tuple(result.at(0), result.at(1), result.at(2));
}
} // namespace
void init_linalg(nb::module_& parent_module) {
auto m = parent_module.def_submodule(
"linalg", "mlx.core.linalg: linear algebra routines.");
@@ -213,7 +206,10 @@ void init_linalg(nb::module_& parent_module) {
)pbdoc");
m.def(
"svd",
&svd_helper,
[](const mx::array& a, mx::StreamOrDevice s /* = {} */) {
const auto result = mx::linalg::svd(a, s);
return nb::make_tuple(result.at(0), result.at(1), result.at(2));
},
"a"_a,
nb::kw_only(),
"stream"_a = nb::none(),
@@ -262,7 +258,7 @@ void init_linalg(nb::module_& parent_module) {
"tri_inv",
&mx::linalg::tri_inv,
"a"_a,
"upper"_a,
"upper"_a = false,
nb::kw_only(),
"stream"_a = nb::none(),
nb::sig(
@@ -276,7 +272,7 @@ void init_linalg(nb::module_& parent_module) {
Args:
a (array): Input array.
upper (array): Whether the array is upper or lower triangular. Defaults to ``False``.
upper (bool, optional): Whether the array is upper or lower triangular. Defaults to ``False``.
stream (Stream, optional): Stream or device. Defaults to ``None``
in which case the default stream of the default device is used.
@@ -441,7 +437,6 @@ void init_linalg(nb::module_& parent_module) {
m.def(
"eigh",
[](const mx::array& a, const std::string UPLO, mx::StreamOrDevice s) {
// TODO avoid cast?
auto result = mx::linalg::eigh(a, UPLO, s);
return nb::make_tuple(result.first, result.second);
},
@@ -484,4 +479,102 @@ void init_linalg(nb::module_& parent_module) {
array([[ 0.707107, -0.707107],
[ 0.707107, 0.707107]], dtype=float32)
)pbdoc");
m.def(
"lu",
[](const mx::array& a, mx::StreamOrDevice s /* = {} */) {
auto result = mx::linalg::lu(a, s);
return nb::make_tuple(result.at(0), result.at(1), result.at(2));
},
"a"_a,
nb::kw_only(),
"stream"_a = nb::none(),
nb::sig(
"def lu(a: array, *, stream: Union[None, Stream, Device] = None) -> Tuple[array, array, array]"),
R"pbdoc(
Compute the LU factorization of the given matrix ``A``.
Note, unlike the default behavior of ``scipy.linalg.lu``, the pivots
are indices. To reconstruct the input use ``L[P, :] @ U`` for 2
dimensions or ``mx.take_along_axis(L, P[..., None], axis=-2) @ U``
for more than 2 dimensions.
To construct the full permuation matrix do:
.. code-block::
P = mx.put_along_axis(mx.zeros_like(L), p[..., None], mx.array(1.0), axis=-1)
Args:
a (array): Input array.
stream (Stream, optional): Stream or device. Defaults to ``None``
in which case the default stream of the default device is used.
Returns:
tuple(array, array, array):
The ``p``, ``L``, and ``U`` arrays, such that ``A = L[P, :] @ U``
)pbdoc");
m.def(
"lu_factor",
&mx::linalg::lu_factor,
"a"_a,
nb::kw_only(),
"stream"_a = nb::none(),
nb::sig(
"def lu_factor(a: array, *, stream: Union[None, Stream, Device] = None) -> Tuple[array, array]"),
R"pbdoc(
Computes a compact representation of the LU factorization.
Args:
a (array): Input array.
stream (Stream, optional): Stream or device. Defaults to ``None``
in which case the default stream of the default device is used.
Returns:
tuple(array, array): The ``LU`` matrix and ``pivots`` array.
)pbdoc");
m.def(
"solve",
&mx::linalg::solve,
"a"_a,
"b"_a,
nb::kw_only(),
"stream"_a = nb::none(),
nb::sig(
"def solve(a: array, b: array, *, stream: Union[None, Stream, Device] = None) -> array"),
R"pbdoc(
Compute the solution to a system of linear equations ``AX = B``.
Args:
a (array): Input array.
b (array): Input array.
stream (Stream, optional): Stream or device. Defaults to ``None``
in which case the default stream of the default device is used.
Returns:
array: The unique solution to the system ``AX = B``.
)pbdoc");
m.def(
"solve_triangular",
&mx::linalg::solve_triangular,
"a"_a,
"b"_a,
nb::kw_only(),
"upper"_a = false,
"stream"_a = nb::none(),
nb::sig(
"def solve_triangular(a: array, b: array, *, upper: bool = False, stream: Union[None, Stream, Device] = None) -> array"),
R"pbdoc(
Computes the solution of a triangular system of linear equations ``AX = B``.
Args:
a (array): Input array.
b (array): Input array.
upper (bool, optional): Whether the array is upper or lower
triangular. Default: ``False``.
stream (Stream, optional): Stream or device. Defaults to ``None``
in which case the default stream of the default device is used.
Returns:
array: The unique solution to the system ``AX = B``.
)pbdoc");
}

View File

@@ -330,6 +330,123 @@ class TestLinalg(mlx_tests.MLXTestCase):
mx.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
) # Non-square matrix
def test_lu(self):
with self.assertRaises(ValueError):
mx.linalg.lu(mx.array(0.0), stream=mx.cpu)
with self.assertRaises(ValueError):
mx.linalg.lu(mx.array([0.0, 1.0]), stream=mx.cpu)
with self.assertRaises(ValueError):
mx.linalg.lu(mx.array([[0, 1], [1, 0]]), stream=mx.cpu)
# Test 3x3 matrix
a = mx.array([[3.0, 1.0, 2.0], [1.0, 8.0, 6.0], [9.0, 2.0, 5.0]])
P, L, U = mx.linalg.lu(a, stream=mx.cpu)
self.assertTrue(mx.allclose(L[P, :] @ U, a))
# Test batch dimension
a = mx.broadcast_to(a, (5, 5, 3, 3))
P, L, U = mx.linalg.lu(a, stream=mx.cpu)
L = mx.take_along_axis(L, P[..., None], axis=-2)
self.assertTrue(mx.allclose(L @ U, a))
def test_lu_factor(self):
mx.random.seed(7)
# Test 3x3 matrix
a = mx.random.uniform(shape=(5, 5))
LU, pivots = mx.linalg.lu_factor(a, stream=mx.cpu)
n = a.shape[-1]
pivots = pivots.tolist()
perm = list(range(n))
for i in range(len(pivots)):
perm[i], perm[pivots[i]] = perm[pivots[i]], perm[i]
L = mx.add(mx.tril(LU, k=-1), mx.eye(n))
U = mx.triu(LU)
self.assertTrue(mx.allclose(L @ U, a[perm, :]))
def test_solve(self):
mx.random.seed(7)
# Test 3x3 matrix with 1D rhs
a = mx.array([[3.0, 1.0, 2.0], [1.0, 8.0, 6.0], [9.0, 2.0, 5.0]])
b = mx.array([11.0, 35.0, 28.0])
result = mx.linalg.solve(a, b, stream=mx.cpu)
expected = np.linalg.solve(a, b)
self.assertTrue(np.allclose(result, expected))
# Test symmetric positive-definite matrix
N = 5
a = mx.random.uniform(shape=(N, N))
a = mx.matmul(a, a.T) + N * mx.eye(N)
b = mx.random.uniform(shape=(N, 1))
result = mx.linalg.solve(a, b, stream=mx.cpu)
expected = np.linalg.solve(a, b)
self.assertTrue(np.allclose(result, expected))
# Test batch dimension
a = mx.random.uniform(shape=(5, 5, 4, 4))
b = mx.random.uniform(shape=(5, 5, 4, 1))
result = mx.linalg.solve(a, b, stream=mx.cpu)
expected = np.linalg.solve(a, b)
self.assertTrue(np.allclose(result, expected, atol=1e-5))
# Test large matrix
N = 1000
a = mx.random.uniform(shape=(N, N))
b = mx.random.uniform(shape=(N, 1))
result = mx.linalg.solve(a, b, stream=mx.cpu)
expected = np.linalg.solve(a, b)
self.assertTrue(np.allclose(result, expected, atol=1e-3))
# Test multi-column rhs
a = mx.random.uniform(shape=(5, 5))
b = mx.random.uniform(shape=(5, 8))
result = mx.linalg.solve(a, b, stream=mx.cpu)
expected = np.linalg.solve(a, b)
self.assertTrue(np.allclose(result, expected))
# Test batched multi-column rhs
a = mx.broadcast_to(a, (3, 2, 5, 5))
b = mx.broadcast_to(b, (3, 1, 5, 8))
result = mx.linalg.solve(a, b, stream=mx.cpu)
expected = np.linalg.solve(a, b)
self.assertTrue(np.allclose(result, expected, rtol=1e-5, atol=1e-5))
def test_solve_triangular(self):
# Test lower triangular matrix
a = mx.array([[4.0, 0.0, 0.0], [2.0, 3.0, 0.0], [1.0, -2.0, 5.0]])
b = mx.array([8.0, 14.0, 3.0])
result = mx.linalg.solve_triangular(a, b, upper=False, stream=mx.cpu)
expected = np.linalg.solve(a, b)
self.assertTrue(np.allclose(result, expected))
# Test upper triangular matrix
a = mx.array([[3.0, 2.0, 1.0], [0.0, 5.0, 4.0], [0.0, 0.0, 6.0]])
b = mx.array([13.0, 33.0, 18.0])
result = mx.linalg.solve_triangular(a, b, upper=True, stream=mx.cpu)
expected = np.linalg.solve(a, b)
self.assertTrue(np.allclose(result, expected))
# Test batch multi-column rhs
a = mx.broadcast_to(a, (3, 4, 3, 3))
b = mx.broadcast_to(mx.expand_dims(b, -1), (3, 4, 3, 8))
result = mx.linalg.solve_triangular(a, b, upper=True, stream=mx.cpu)
expected = np.linalg.solve(a, b)
self.assertTrue(np.allclose(result, expected))
if __name__ == "__main__":
unittest.main()