Add CUDA sdpa vector (#2468)

2025-12-16 01:49:05 +08:00 · 2025-08-06 21:40:26 -07:00
parent f2adb5638d
commit a9bdd67baa
3 changed files with 782 additions and 12 deletions
--- a/mlx/backend/cuda/primitives.cpp
+++ b/mlx/backend/cuda/primitives.cpp
@@ -6,17 +6,6 @@

 namespace mlx::core {

-bool fast::ScaledDotProductAttention::use_fallback(
-    const array& q,
-    const array& k,
-    const array& v,
-    bool has_mask,
-    bool has_arr_mask,
-    bool do_causal,
-    Stream s) {
-  return true;
-}
-
 #define NO_GPU_MULTI(func)                                             \
  void func::eval_gpu(                                                 \
      const std::vector<array>& inputs, std::vector<array>& outputs) { \
@@ -53,7 +42,6 @@ NO_GPU_MULTI(Eig)
 NO_GPU_MULTI(Eigh)

 namespace fast {
-NO_GPU(ScaledDotProductAttention)
 NO_GPU_MULTI(CustomKernel)
 } // namespace fast