[CUDA] Initial implementation of Convolution with cuDNN (#2385)

* Link with cuDNN * Initial implementation * Remove backend apis * Fix recording cudnn conv * More unused backend apis * Fix C++ conv tests * include cudnn as python dep * Install libcudnn9-dev-cuda-12 in CI * cudnn only accepts contiguous inputs * Switch to backend apis * Plan needs to be kept alive * Turn off tf32 * Add cache * Test the native cuda graph api * Set cudnn stream before execution * Make LRUCache more like a normal container * Do error check for cublas handle * Zero-initilizing array * Use tf32 for conv * Skip TestConv.test_torch_conv_2D test --------- Co-authored-by: Awni Hannun <awni@apple.com>
2025-12-16 01:49:05 +08:00 · 2025-07-25 08:12:10 +09:00
parent 70dc336785
commit 6f5874a2f2
13 changed files with 590 additions and 50 deletions
--- a/mlx/backend/cuda/CMakeLists.txt
+++ b/mlx/backend/cuda/CMakeLists.txt
@@ -15,6 +15,7 @@ target_sources(
          ${CMAKE_CURRENT_SOURCE_DIR}/copy/copy_general.cu
          ${CMAKE_CURRENT_SOURCE_DIR}/copy/copy_general_dynamic.cu
          ${CMAKE_CURRENT_SOURCE_DIR}/copy/copy_general_input.cu
+          ${CMAKE_CURRENT_SOURCE_DIR}/conv.cpp
          ${CMAKE_CURRENT_SOURCE_DIR}/cuda.cpp
          ${CMAKE_CURRENT_SOURCE_DIR}/device.cpp
          ${CMAKE_CURRENT_SOURCE_DIR}/eval.cpp
@@ -131,6 +132,23 @@ target_link_libraries(mlx PRIVATE CUDA::cublasLt)
 # Use NVRTC and driver APIs.
 target_link_libraries(mlx PRIVATE CUDA::nvrtc CUDA::cuda_driver)

+# Use the frontend APIs of cuDNN.
+FetchContent_Declare(
+  cudnn
+  GIT_REPOSITORY https://github.com/NVIDIA/cudnn-frontend.git
+  GIT_TAG v1.12.1
+  GIT_SHALLOW TRUE
+  EXCLUDE_FROM_ALL)
+set(CUDNN_FRONTEND_SKIP_JSON_LIB ON)
+set(CUDNN_FRONTEND_BUILD_SAMPLES OFF)
+set(CUDNN_FRONTEND_BUILD_TESTS OFF)
+set(CUDNN_FRONTEND_BUILD_PYTHON_BINDINGS OFF)
+FetchContent_MakeAvailable(cudnn)
+target_link_libraries(mlx PRIVATE cudnn_frontend)
+# Link with the actual cuDNN libraries.
+include(${cudnn_frontend_SOURCE_DIR}/cmake/cuDNN.cmake)
+target_link_libraries(mlx PRIVATE CUDNN::cudnn_all)
+
 # Suppress nvcc warnings on MLX headers.
 target_compile_options(mlx PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xcudafe
                                   --diag_suppress=997>)