mirror of
https://github.com/ml-explore/mlx.git
synced 2025-12-16 01:49:05 +08:00
Some checks failed
Nightly Build / build_linux_release (3.10) (push) Has been cancelled
Nightly Build / build_linux_release (3.14) (push) Has been cancelled
Nightly Build / build_linux_with_tests (3.10) (push) Has been cancelled
Nightly Build / build_linux_with_tests (3.11) (push) Has been cancelled
Nightly Build / build_linux_with_tests (3.12) (push) Has been cancelled
Nightly Build / build_linux_with_tests (3.13) (push) Has been cancelled
Nightly Build / build_linux_with_tests (3.14) (push) Has been cancelled
Nightly Build / build_mac_release (3.10) (push) Has been cancelled
Nightly Build / build_mac_release (3.13) (push) Has been cancelled
Nightly Build / build_cuda_with_tests (push) Has been cancelled
Nightly Build / build_cuda_release (push) Has been cancelled
Nightly Build / Linux Fedora CPP Build (aarch64) (push) Has been cancelled
Nightly Build / Linux Fedora CPP Build (x86_64) (push) Has been cancelled
* Use async cuda malloc managed with cuda 13 * add pool threshold * refactor for regular cuda malloc * load eval gpu for cuda * remove use of cuda pool, use cuda free async * fix * fix * fix * fix * fix + comment
25 lines
620 B
C++
25 lines
620 B
C++
// Copyright © 2024 Apple Inc.
|
|
|
|
#include "mlx/backend/common/utils.h"
|
|
|
|
namespace mlx::core {
|
|
|
|
void broadcast(const array& in, array& out) {
|
|
if (out.size() == 0) {
|
|
out.set_data(allocator::malloc(0));
|
|
return;
|
|
}
|
|
Strides strides(out.ndim(), 0);
|
|
int diff = out.ndim() - in.ndim();
|
|
for (int i = in.ndim() - 1; i >= 0; --i) {
|
|
strides[i + diff] = (in.shape()[i] == 1) ? 0 : in.strides()[i];
|
|
}
|
|
auto flags = in.flags();
|
|
if (out.size() > in.size()) {
|
|
flags.row_contiguous = flags.col_contiguous = false;
|
|
}
|
|
out.copy_shared_buffer(in, strides, flags, in.data_size());
|
|
}
|
|
|
|
} // namespace mlx::core
|