Refactor reductions and fix scatter atomics for large sizes (#1300)

Co-authored-by: Angelos Katharopoulos <a_katharopoulos@apple.com>
2025-12-16 01:49:05 +08:00 · 2024-08-22 16:03:31 -07:00
parent f9e00efe31
commit 98b6ce3460
18 changed files with 1584 additions and 1235 deletions
--- a/mlx/backend/common/utils.h
+++ b/mlx/backend/common/utils.h
@@ -104,6 +104,33 @@ inline auto collapse_contiguous_dims(Arrays&&... xs) {
      std::vector<array>{std::forward<Arrays>(xs)...});
 }

+// The single array version of the above.
+inline std::tuple<std::vector<int>, std::vector<size_t>>
+collapse_contiguous_dims(
+    const std::vector<int>& shape,
+    const std::vector<size_t>& strides) {
+  std::vector<int> collapsed_shape;
+  std::vector<size_t> collapsed_strides;
+
+  if (shape.size() > 0) {
+    collapsed_shape.push_back(shape[0]);
+    collapsed_strides.push_back(strides[0]);
+    for (int i = 1; i < shape.size(); i++) {
+      if (strides[i] * shape[i] != collapsed_strides.back() ||
+          collapsed_shape.back() * static_cast<size_t>(shape[i]) >
+              std::numeric_limits<int>::max()) {
+        collapsed_shape.push_back(shape[i]);
+        collapsed_strides.push_back(strides[i]);
+      } else {
+        collapsed_shape.back() *= shape[i];
+        collapsed_strides.back() = strides[i];
+      }
+    }
+  }
+
+  return std::make_tuple(collapsed_shape, collapsed_strides);
+}
+
 template <typename stride_t>
 inline auto check_contiguity(
    const std::vector<int>& shape,