Debug cuda conv (#2662)

* use t4 * use t4
2025-12-10 22:46:53 +08:00 · 2025-10-10 16:12:47 -07:00
parent 630350ad3e
commit 226a1d24e0
3 changed files with 15 additions and 13 deletions
--- a/mlx/backend/common/matmul.h
+++ b/mlx/backend/common/matmul.h
@@ -13,7 +13,7 @@ inline std::tuple<Shape, Strides, Strides> collapse_batches(
    const array& a,
    const array& b) {
  if (a.ndim() == 2) {
-    return {{1}, {0}, {0}};
+    return {Shape{1}, Strides{0}, Strides{0}};
  }

  Shape A_bshape{a.shape().begin(), a.shape().end() - 2};
@@ -38,7 +38,7 @@ inline std::tuple<Shape, Strides, Strides> collapse_batches(
 inline std::tuple<Shape, Strides, Strides, Strides>
 collapse_batches(const array& a, const array& b, const array& c) {
  if (a.ndim() == 2) {
-    return {{1}, {0}, {0}, {0}};
+    return {Shape{1}, Strides{0}, Strides{0}, Strides{0}};
  }

  Shape A_bshape{a.shape().begin(), a.shape().end() - 2};
--- a/mlx/backend/cuda/conv.cpp
+++ b/mlx/backend/cuda/conv.cpp
@@ -382,20 +382,19 @@ void Convolution::eval_gpu(const std::vector<array>& inputs, array& out_) {
  }

  if (op_graph) {
-    // Setup inputs and outputs.
-    register_args(encoder, backend_type, in, wt, out, out_);
-
    // Find a plan for the graph and execute it.
    auto plan = find_cudnn_plan_from_op_graph(
        encoder.device().cudnn_handle(), backend_type, dtype, *op_graph);
-    if (!plan) {
-      throw std::runtime_error("[conv] Unable to find an execution plan.");
-    }
-    auto [x, w, y] = dispatch_args(backend_type, in, wt, out);
-    if (encode_cudnn_plan(encoder, *plan, {'x', 'w', 'y'}, x, w, y)) {
-      conv_cache().emplace(
-          cache_key, std::make_pair(backend_type, std::move(*plan)));
-      return;
+    if (plan) {
+      // Setup inputs and outputs.
+      register_args(encoder, backend_type, in, wt, out, out_);
+
+      auto [x, w, y] = dispatch_args(backend_type, in, wt, out);
+      if (encode_cudnn_plan(encoder, *plan, {'x', 'w', 'y'}, x, w, y)) {
+        conv_cache().emplace(
+            cache_key, std::make_pair(backend_type, std::move(*plan)));
+        return;
+      }
    }
  }

--- a/mlx/backend/cuda/cudnn_utils.cpp
+++ b/mlx/backend/cuda/cudnn_utils.cpp
@@ -210,6 +210,9 @@ std::optional<cudnn_frontend::ExecutionPlan> find_cudnn_plan_from_op_graph(
    Dtype dtype,
    cudnn_frontend::OperationGraph& op_graph) {
  auto engine_configs = get_cudnn_engine_configs(backend_type, dtype, op_graph);
+  if (engine_configs.empty()) {
+    return std::nullopt;
+  }
  return find_cudnn_plan_from_engine_configs(handle, engine_configs, op_graph);
 }