fix 2 pass

2025-12-16 01:49:05 +08:00 · 2025-09-09 12:12:09 -07:00
parent 3ca3ab9dcd
commit 0fe25eb588
2 changed files with 13 additions and 12 deletions
--- a/mlx/backend/metal/kernels/sdpa_vector.h
+++ b/mlx/backend/metal/kernels/sdpa_vector.h
@@ -263,7 +263,7 @@ template <typename T, int D, int V = D>
  U max_score = -INFINITY;
  U sum_exp_score = 0;
-  if (has_sinks && simd_gid == 0) {
+  if (has_sinks && block_idx == 0 && simd_gid == 0) {
    int q_head_idx = q_batch_head_idx % num_q_heads;
    max_score = static_cast<U>(sinks[q_head_idx]);
    sum_exp_score = 1;
--- a/python/tests/test_fast_sdpa.py
+++ b/python/tests/test_fast_sdpa.py
@@ -730,7 +730,8 @@ class TestSDPA(mlx_tests.MLXTestCase):
        with self.assertRaises(ValueError):
            mx.fast.scaled_dot_product_attention(q, k, v, scale=scale, sinks=sinks)
-        for T_q in [1, 128]:
+        for T_kv in [128, 4096]:
            for T_q in [1]:  # , 128]:
                for N_kv in [2, 8]:
                    q = mx.random.normal(shape=(B, N_q, T_q, D))
                    k = mx.random.normal(shape=(B, N_kv, T_kv, D))