Custom cuda kernel (#2517)

2025-12-16 01:49:05 +08:00 · 2025-08-20 17:20:22 -07:00
parent f4c8888cbe
commit e397177f6e
19 changed files with 1042 additions and 211 deletions
--- a/mlx/backend/cuda/compiled.cpp
+++ b/mlx/backend/cuda/compiled.cpp
@@ -267,7 +267,8 @@ void Compiled::eval_gpu(
      }
    }

-    return std::make_pair(std::move(builder.os), std::move(kernel_names));
+    return std::make_tuple(
+        false, std::move(builder.os), std::move(kernel_names));
  });

  // Collapse contiguous dims to route to a faster kernel if possible. Also