[CUDA] Initial implementation of Convolution with cuDNN (#2385)

* Link with cuDNN * Initial implementation * Remove backend apis * Fix recording cudnn conv * More unused backend apis * Fix C++ conv tests * include cudnn as python dep * Install libcudnn9-dev-cuda-12 in CI * cudnn only accepts contiguous inputs * Switch to backend apis * Plan needs to be kept alive * Turn off tf32 * Add cache * Test the native cuda graph api * Set cudnn stream before execution * Make LRUCache more like a normal container * Do error check for cublas handle * Zero-initilizing array * Use tf32 for conv * Skip TestConv.test_torch_conv_2D test --------- Co-authored-by: Awni Hannun <awni@apple.com>
2025-12-16 01:49:05 +08:00 · 2025-07-25 08:12:10 +09:00
parent 70dc336785
commit 6f5874a2f2
13 changed files with 590 additions and 50 deletions
--- a/mlx/backend/cuda/utils.cpp
+++ b/mlx/backend/cuda/utils.cpp
@@ -17,6 +17,14 @@ CudaStream::~CudaStream() {
  CHECK_CUDA_ERROR(cudaStreamDestroy(stream_));
 }

+void check_cublas_error(const char* name, cublasStatus_t err) {
+  if (err != CUBLAS_STATUS_SUCCESS) {
+    // TODO: Use cublasGetStatusString when it is widely available.
+    throw std::runtime_error(
+        fmt::format("{} failed with code: {}.", name, static_cast<int>(err)));
+  }
+}
+
 void check_cuda_error(const char* name, cudaError_t err) {
  if (err != cudaSuccess) {
    throw std::runtime_error(