[CUDA] Initial implementation of Convolution with cuDNN (#2385)

* Link with cuDNN * Initial implementation * Remove backend apis * Fix recording cudnn conv * More unused backend apis * Fix C++ conv tests * include cudnn as python dep * Install libcudnn9-dev-cuda-12 in CI * cudnn only accepts contiguous inputs * Switch to backend apis * Plan needs to be kept alive * Turn off tf32 * Add cache * Test the native cuda graph api * Set cudnn stream before execution * Make LRUCache more like a normal container * Do error check for cublas handle * Zero-initilizing array * Use tf32 for conv * Skip TestConv.test_torch_conv_2D test --------- Co-authored-by: Awni Hannun <awni@apple.com>
2025-12-07 19:59:01 +08:00 · 2025-07-25 08:12:10 +09:00
parent 70dc336785
commit 6f5874a2f2
13 changed files with 590 additions and 50 deletions
--- a/python/tests/cuda_skip.py
+++ b/python/tests/cuda_skip.py
@@ -15,19 +15,12 @@ cuda_skip = {
    "TestOps.test_hadamard_grad_vmap",
    # Convolutions NYI
    "TestConv.test_1d_conv_with_2d",
-    "TestConv.test_asymmetric_padding",
-    "TestConv.test_basic_grad_shapes",
-    "TestConv.test_conv2d_unaligned_channels",
    "TestConv.test_conv_1d_groups_flipped",
    "TestConv.test_conv_general_flip_grad",
    "TestConv.test_conv_groups_grad",
-    "TestConv.test_numpy_conv",
-    "TestConv.test_repeated_conv",
-    "TestConv.test_torch_conv_1D",
    "TestConv.test_torch_conv_1D_grad",
    "TestConv.test_torch_conv_2D",
    "TestConv.test_torch_conv_2D_grad",
-    "TestConv.test_torch_conv_3D",
    "TestConv.test_torch_conv_3D_grad",
    "TestConv.test_torch_conv_depthwise",
    "TestConv.test_torch_conv_general",
@@ -40,10 +33,6 @@ cuda_skip = {
    "TestConvTranspose.test_torch_conv_transpose_3D",
    "TestConvTranspose.test_torch_conv_transpose_3D_grad",
    "TestConvTranspose.test_torch_conv_transpose_3d_output_padding",
-    "TestExportImport.test_export_conv",
-    "TestLayers.test_conv1d",
-    "TestLayers.test_conv2d",
-    "TestVmap.test_vmap_conv",
    # FFTs NYI
    "TestFFT.test_fft",
    "TestFFT.test_fft_big_powers_of_two",