Use LRU cache for cuda graph (#2448)

* Use LRU cache for cuda graph * Remove unused destructor
2025-12-16 01:49:05 +08:00 · 2025-08-02 21:28:57 +09:00
parent 8831064493
commit aaf78f4c6b
5 changed files with 63 additions and 25 deletions
--- a/mlx/backend/cuda/utils.cpp
+++ b/mlx/backend/cuda/utils.cpp
@@ -17,6 +17,27 @@ CudaStream::~CudaStream() {
  CHECK_CUDA_ERROR(cudaStreamDestroy(stream_));
 }

+CudaGraphExec::CudaGraphExec(cudaGraphExec_t handle) : handle_(handle) {}
+
+CudaGraphExec::CudaGraphExec(CudaGraphExec&& other) : handle_(other.handle_) {
+  other.handle_ = nullptr;
+};
+
+CudaGraphExec::~CudaGraphExec() {
+  reset();
+}
+
+void CudaGraphExec::instantiate(cudaGraph_t graph) {
+  CHECK_CUDA_ERROR(cudaGraphInstantiate(&handle_, graph, nullptr, nullptr, 0));
+}
+
+void CudaGraphExec::reset() {
+  if (handle_ != nullptr) {
+    CHECK_CUDA_ERROR(cudaGraphExecDestroy(handle_));
+    handle_ = nullptr;
+  }
+}
+
 void check_cublas_error(const char* name, cublasStatus_t err) {
  if (err != CUBLAS_STATUS_SUCCESS) {
    // TODO: Use cublasGetStatusString when it is widely available.