Some overhead reductions in mx.fast.metal_kernel (#1437)

* some overhead reductions * fix * use += * use more +=
2025-10-22 02:58:16 +08:00 · 2024-09-25 17:25:21 -07:00
parent 4f9f9ebb6f
commit 0b4a58699e
3 changed files with 130 additions and 101 deletions
--- a/mlx/backend/metal/custom_kernel.cpp
+++ b/mlx/backend/metal/custom_kernel.cpp
@@ -49,7 +49,7 @@ void CustomKernel::eval_gpu(
  int index = 0;
  for (int i = 0; i < checked_inputs.size(); i++) {
    const array& in = checked_inputs[i];
-    auto shape_info = shape_infos_[i];
+    auto& shape_info = shape_infos_[i];
    compute_encoder.set_input_array(in, index);
    index++;
    if (in.ndim() > 0) {
@@ -68,7 +68,7 @@ void CustomKernel::eval_gpu(
      }
    }
  }
-  for (array out : outputs) {
+  for (auto& out : outputs) {
    compute_encoder.set_output_array(out, index);
    index++;
  }