Faster indexing math in a few kernels (#1589)

* wip: faster compiled kernels * faster general unary with uint specialization * index type in compiled, unary, binary, ternary, copy * fix jit * jit fix * specialize gather + scatter * nit in docs
2025-12-16 01:49:05 +08:00 · 2024-11-18 19:52:00 -08:00
parent bf481e8e5d
commit 2419edd5b2
25 changed files with 630 additions and 484 deletions
--- a/mlx/backend/common/compiled_cpu.cpp
+++ b/mlx/backend/common/compiled_cpu.cpp
@@ -279,7 +279,7 @@ void Compiled::eval_cpu(

  // Figure out which kernel we are using
  auto& shape = outputs[0].shape();
-  bool contiguous = compiled_check_contiguity(inputs, shape);
+  auto contiguous = compiled_check_contiguity(inputs, shape);

  // Handle all broadcasting and collect function input arguments
  std::vector<void*> args;