Refactor reductions and fix scatter atomics for large sizes (#1300)

Co-authored-by: Angelos Katharopoulos <a_katharopoulos@apple.com>
This commit is contained in:
Awni Hannun
2024-08-22 16:03:31 -07:00
committed by GitHub
parent f9e00efe31
commit 98b6ce3460
18 changed files with 1584 additions and 1235 deletions

View File

@@ -104,6 +104,33 @@ inline auto collapse_contiguous_dims(Arrays&&... xs) {
std::vector<array>{std::forward<Arrays>(xs)...});
}
// The single array version of the above.
inline std::tuple<std::vector<int>, std::vector<size_t>>
collapse_contiguous_dims(
const std::vector<int>& shape,
const std::vector<size_t>& strides) {
std::vector<int> collapsed_shape;
std::vector<size_t> collapsed_strides;
if (shape.size() > 0) {
collapsed_shape.push_back(shape[0]);
collapsed_strides.push_back(strides[0]);
for (int i = 1; i < shape.size(); i++) {
if (strides[i] * shape[i] != collapsed_strides.back() ||
collapsed_shape.back() * static_cast<size_t>(shape[i]) >
std::numeric_limits<int>::max()) {
collapsed_shape.push_back(shape[i]);
collapsed_strides.push_back(strides[i]);
} else {
collapsed_shape.back() *= shape[i];
collapsed_strides.back() = strides[i];
}
}
}
return std::make_tuple(collapsed_shape, collapsed_strides);
}
template <typename stride_t>
inline auto check_contiguity(
const std::vector<int>& shape,