diff --git a/mlx/backend/cuda/scaled_dot_product_attention.cu b/mlx/backend/cuda/scaled_dot_product_attention.cu index 9af6160d3..903b7c4b6 100644 --- a/mlx/backend/cuda/scaled_dot_product_attention.cu +++ b/mlx/backend/cuda/scaled_dot_product_attention.cu @@ -1068,8 +1068,7 @@ void ScaledDotProductAttention::eval_gpu( flags); } - // return sdpa_vector_fallback(s, encoder, q, k, v, scale_, o, do_causal_); - return sdpa_cudnn(s, encoder, q, k, v, scale_, o, do_causal_); + return sdpa_vector_fallback(s, encoder, q, k, v, scale_, o, do_causal_); } // Full attention mode