Files
mlx/mlx/backend/metal/distributed.cpp
Anastasiia Filippova 27778156dc Nccl reduce scatter, all gather (#2727)
* Added reduce scatter and all gather for nccl

* fix unused import, delete unused file

* small fix

* deleted useless condition

* fixed comments

* fix bug in eval_gpu, renamed to sum_scatter, fix docs

* final fix docs

* remove and

* Update mlx/distributed/mpi/mpi.cpp

Co-authored-by: Awni Hannun <awni.hannun@gmail.com>

* fix broken set input output

* fixes set output

* typo

* fix typo

* no cpu, no gpu for reduce scatter

---------

Co-authored-by: Awni Hannun <awni.hannun@gmail.com>
2025-11-05 08:21:11 -08:00

39 lines
1.2 KiB
C++

// Copyright © 2024 Apple Inc.
#include <cassert>
#include "mlx/allocator.h"
#include "mlx/backend/common/utils.h"
#include "mlx/backend/gpu/copy.h"
#include "mlx/backend/metal/device.h"
#include "mlx/backend/metal/utils.h"
#include "mlx/distributed/ops.h"
#include "mlx/distributed/primitives.h"
#include "mlx/fence.h"
#include "mlx/scheduler.h"
namespace mlx::core::distributed {
void AllReduce::eval_gpu(const std::vector<array>&, std::vector<array>&) {
throw std::runtime_error("[AllReduce::eval_gpu] has no GPU implementation.");
}
void AllGather::eval_gpu(const std::vector<array>&, std::vector<array>&) {
throw std::runtime_error("[AllGather::eval_gpu] has no GPU implementation.");
}
void Send::eval_gpu(const std::vector<array>&, std::vector<array>&) {
throw std::runtime_error("[Send::eval_gpu] has no GPU implementation.");
}
void Recv::eval_gpu(const std::vector<array>&, std::vector<array>&) {
throw std::runtime_error("[Recv::eval_gpu] has no GPU implementation.");
}
void ReduceScatter::eval_gpu(const std::vector<array>&, std::vector<array>&) {
throw std::runtime_error(
"[ReduceScatter::eval_gpu] has no GPU implementation.");
}
} // namespace mlx::core::distributed