promote mask when needed (#1998)

2025-12-12 23:39:04 +08:00 · 2025-03-23 19:58:28 -07:00
parent f018e248cd
commit a84cc0123f
2 changed files with 18 additions and 0 deletions
--- a/mlx/fast.cpp
+++ b/mlx/fast.cpp
@@ -750,6 +750,8 @@ array scaled_dot_product_attention(
      msg << "[scaled_dot_product_attention] Mask type must promote to output type. "
          << final_type << ".";
      throw std::invalid_argument(msg.str());
    } else if (!has_bool_mask) {
      mask_arr = astype(mask_arr, final_type, stream);
    }
    // Broadcast mask
    auto mask_shape = queries.shape();
--- a/python/tests/test_fast_sdpa.py
+++ b/python/tests/test_fast_sdpa.py
@@ -543,6 +543,22 @@ class TestSDPA(mlx_tests.MLXTestCase):
        out = mx.fast.scaled_dot_product_attention(q, k, v, scale=scale, mask=mask)
        self.assertTrue(mx.allclose(ref, out, atol=1e-4, rtol=1e-4))
    def test_sdpa_prommote_mask(self):
        mask = mx.array(2.0, mx.bfloat16)
        D = 64
        Nq = 4
        Nkv = 1
        scale = 1.0
        L = 256
        mx.random.seed(0)
        q = 5e-1 * mx.random.normal(shape=(1, Nq, L, D))
        k = 5e-1 * mx.random.normal(shape=(1, Nkv, L, D))
        v = 5e-1 * mx.random.normal(shape=(1, Nkv, L, D))
        ref = mlx_primitives_sdpa(q, k, v, scale, mask=mask)
        out = mx.fast.scaled_dot_product_attention(q, k, v, scale=scale, mask=mask)
        self.assertTrue(mx.allclose(ref, out, atol=1e-4, rtol=1e-4))
 if __name__ == "__main__":
    unittest.main(failfast=True)