Test the native cuda graph api

2025-12-16 01:49:05 +08:00 · 2025-07-20 03:40:15 -07:00
parent 85510dae78
commit 67a5f7b2a8
1 changed files with 18 additions and 0 deletions
--- a/mlx/backend/cuda/conv.cpp
+++ b/mlx/backend/cuda/conv.cpp
@@ -22,6 +22,9 @@ namespace mlx::core {

 namespace {

+// Not all engines support it so can not use this API now.
+#define MLX_USE_CUDNN_NATIVE_CUDA_GRAPH_API 0
+
 struct ConvCacheKey {
  int device_id;
  cudnnBackendDescriptorType_t backend_type;
@@ -181,6 +184,20 @@ bool execute_plan(
                         .setUids(3, uids)
                         .build();

+#if CUDNN_VERSION >= 90500 && MLX_USE_CUDNN_NATIVE_CUDA_GRAPH_API
+  cudaGraph_t graph;
+  cudaGraphCreate(&graph, 0);
+  std::unique_ptr<cudaGraph_t, void (*)(cudaGraph_t*)> graph_freer(
+      &graph, [](cudaGraph_t* p) { cudaGraphDestroy(*p); });
+  if (cudnnBackendPopulateCudaGraph(
+          encoder.device().cudnn_handle(),
+          plan.get_raw_desc(),
+          variantPack.get_raw_desc(),
+          graph) != CUDNN_STATUS_SUCCESS) {
+    return false;
+  }
+  encoder.add_graph_node(graph);
+#else
  auto capture = encoder.capture_context();
  if (cudnnBackendExecute(
          encoder.device().cudnn_handle(),
@@ -190,6 +207,7 @@ bool execute_plan(
    capture.discard = true;
    return false;
  }
+#endif

  encoder.add_temporary(workspace);
  return true;