non square qr (#1783)

2025-09-18 10:26:56 +08:00 · 2025-01-21 14:07:47 -08:00
parent 1f4c127fb9
commit e6a7ab9675
3 changed files with 37 additions and 15 deletions
--- a/mlx/backend/common/qrf.cpp
+++ b/mlx/backend/common/qrf.cpp
@@ -41,7 +41,7 @@ template <typename T>
 void qrf_impl(const array& a, array& q, array& r) {
  const int M = a.shape(-2);
  const int N = a.shape(-1);
-  const int lda = std::max(M, N);
+  const int lda = M;
  size_t num_matrices = a.size() / (M * N);
  int num_reflectors = std::min(M, N);
  auto tau =
@@ -89,13 +89,16 @@ void qrf_impl(const array& a, array& q, array& r) {
  allocator::free(work);

  r.set_data(allocator::malloc_or_wait(r.nbytes()));
-  copy_inplace(in, r, CopyType::General);

  for (int i = 0; i < num_matrices; ++i) {
-    // Zero lower triangle
+    /// num_reflectors x N
    for (int j = 0; j < r.shape(-2); ++j) {
      for (int k = 0; k < j; ++k) {
-        r.data<T>()[i * N * M + j * N + k] = 0;
+        r.data<T>()[i * N * num_reflectors + j * N + k] = 0;
+      }
+      for (int k = j; k < r.shape(-1); ++k) {
+        r.data<T>()[i * N * num_reflectors + j * N + k] =
+            in.data<T>()[i * N * M + j + k * M];
      }
    }
  }
@@ -104,7 +107,7 @@ void qrf_impl(const array& a, array& q, array& r) {
  lwork = -1;
  lpack<T>::xorgqr(
      &M,
-      &N,
+      &num_reflectors,
      &num_reflectors,
      nullptr,
      &lda,
@@ -120,7 +123,7 @@ void qrf_impl(const array& a, array& q, array& r) {
    // Compute Q
    lpack<T>::xorgqr(
        &M,
-        &N,
+        &num_reflectors,
        &num_reflectors,
        in.data<float>() + M * N * i,
        &lda,
@@ -131,7 +134,15 @@ void qrf_impl(const array& a, array& q, array& r) {
  }

  q.set_data(allocator::malloc_or_wait(q.nbytes()));
-  copy_inplace(in, q, CopyType::General);
+  for (int i = 0; i < num_matrices; ++i) {
+    // M x num_reflectors
+    for (int j = 0; j < q.shape(-2); ++j) {
+      for (int k = 0; k < q.shape(-1); ++k) {
+        q.data<T>()[i * M * num_reflectors + j * num_reflectors + k] =
+            in.data<T>()[i * N * M + j + k * M];
+      }
+    }
+  }

  // Cleanup
  allocator::free(work);
--- a/mlx/linalg.cpp
+++ b/mlx/linalg.cpp
@@ -187,13 +187,13 @@ std::pair<array, array> qr(const array& a, StreamOrDevice s /* = {} */) {
        << a.ndim() << " dimensions.";
    throw std::invalid_argument(msg.str());
  }
-  if (a.shape(-1) != a.shape(-2)) {
-    throw std::invalid_argument(
-        "[linalg::qr] Support for non-square matrices NYI.");
-  }
-
+  int k = std::min(a.shape(-2), a.shape(-1));
+  auto q_shape = a.shape();
+  q_shape.back() = k;
+  auto r_shape = a.shape();
+  r_shape[r_shape.size() - 2] = k;
  auto out = array::make_arrays(
-      {a.shape(), a.shape()},
+      {std::move(q_shape), std::move(r_shape)},
      {a.dtype(), a.dtype()},
      std::make_shared<QRF>(to_stream(s)),
      {astype(a, a.dtype(), s)});