Stop matrix copies with new attention kernel (#1639)

2025-12-16 01:49:05 +08:00 · 2024-12-02 14:12:38 -08:00
parent 1445dcaa60
commit 9d40e521d7
1 changed files with 2 additions and 4 deletions
--- a/mlx/backend/metal/scaled_dot_product_attention.cpp
+++ b/mlx/backend/metal/scaled_dot_product_attention.cpp
@@ -288,11 +288,9 @@ void ScaledDotProductAttention::eval_gpu(
        strides[0] == strides[1] * shape[1];
  };

-  // Checks that the last two dims are row contiguous.
+  // Checks that the headdim dimension has stride 1.
  auto is_matrix_contiguous = [](const array& arr) {
-    auto& strides = arr.strides();
-    auto& shape = arr.shape();
-    return strides[3] == 1 && strides[2] == shape[3];
+    return arr.strides(3) == 1;
  };

  // We are in vector mode ie single query