Add base cudnn attention support

2025-10-30 23:38:09 +08:00 · 2025-07-25 12:30:22 -07:00
parent db5c7efcf6
commit d8ed6c1aa3
2 changed files with 1 additions and 14 deletions
--- a/mlx/backend/cuda/CMakeLists.txt
+++ b/mlx/backend/cuda/CMakeLists.txt
@@ -39,6 +39,7 @@ target_sources(
          ${CMAKE_CURRENT_SOURCE_DIR}/reduce/row_reduce.cu
          ${CMAKE_CURRENT_SOURCE_DIR}/rms_norm.cu
          ${CMAKE_CURRENT_SOURCE_DIR}/rope.cu
          ${CMAKE_CURRENT_SOURCE_DIR}/scaled_dot_product_attention.cu
          ${CMAKE_CURRENT_SOURCE_DIR}/scan.cu
          ${CMAKE_CURRENT_SOURCE_DIR}/slicing.cpp
          ${CMAKE_CURRENT_SOURCE_DIR}/softmax.cu
--- a/mlx/backend/cuda/primitives.cpp
+++ b/mlx/backend/cuda/primitives.cpp
@@ -4,19 +4,6 @@
 #include "mlx/fast_primitives.h"
 #include "mlx/primitives.h"
 namespace mlx::core {
 bool fast::ScaledDotProductAttention::use_fallback(
    const array& q,
    const array& k,
    const array& v,
    bool has_mask,
    bool has_arr_mask,
    bool do_causal,
    Stream s) {
  return true;
 }
 #define NO_GPU_MULTI(func)                                             \
  void func::eval_gpu(                                                 \
      const std::vector<array>& inputs, std::vector<array>& outputs) { \
@@ -53,7 +40,6 @@ NO_GPU_MULTI(Eig)
 NO_GPU_MULTI(Eigh)
 namespace fast {
 NO_GPU(ScaledDotProductAttention)
 NO_GPU_MULTI(CustomKernel)
 } // namespace fast