mirror of
https://github.com/ml-explore/mlx.git
synced 2025-12-16 01:49:05 +08:00
redesign for faster cpu/gpu synch (#1869)
* redesign for faster cpu/gpu synch * load + more async CPU * use command encoder API and move more ops to use it * make fence back-end generic + CPU only fence * faster build * fix async eval * fixes + handle temporaries * fix / improve cpu conv * remove unused status, fix siblings * fix extensions * fix * fix no cpu build * format * comments * fix perf regression, remove unecessary abort * fix events, task limit cpu * fix waiting * fix donation / temporaries in normalization
This commit is contained in:
@@ -6,31 +6,25 @@
|
||||
#include "mlx/distributed/distributed_impl.h"
|
||||
#include "mlx/distributed/mpi/mpi.h"
|
||||
#include "mlx/distributed/ring/ring.h"
|
||||
#include "mlx/scheduler.h"
|
||||
|
||||
namespace mlx::core::distributed {
|
||||
|
||||
namespace detail {
|
||||
|
||||
Stream communication_stream() {
|
||||
static Stream comm_stream = new_stream(Device::cpu);
|
||||
return comm_stream;
|
||||
void all_sum(Group group, const array& input, array& output, Stream stream) {
|
||||
group.raw_group()->all_sum(input, output, stream);
|
||||
}
|
||||
|
||||
void all_sum(Group group, const array& input, array& output) {
|
||||
group.raw_group()->all_sum(input, output);
|
||||
void all_gather(Group group, const array& input, array& output, Stream stream) {
|
||||
group.raw_group()->all_gather(input, output, stream);
|
||||
}
|
||||
|
||||
void all_gather(Group group, const array& input, array& output) {
|
||||
group.raw_group()->all_gather(input, output);
|
||||
void send(Group group, const array& input, int dst, Stream stream) {
|
||||
group.raw_group()->send(input, dst, stream);
|
||||
}
|
||||
|
||||
void send(Group group, const array& input, int dst) {
|
||||
group.raw_group()->send(input, dst);
|
||||
}
|
||||
|
||||
void recv(Group group, array& out, int src) {
|
||||
group.raw_group()->recv(out, src);
|
||||
void recv(Group group, array& out, int src, Stream stream) {
|
||||
group.raw_group()->recv(out, src, stream);
|
||||
}
|
||||
|
||||
class EmptyGroup : public GroupImpl {
|
||||
@@ -47,19 +41,19 @@ class EmptyGroup : public GroupImpl {
|
||||
throw std::runtime_error("Cannot split the distributed group further.");
|
||||
}
|
||||
|
||||
void all_sum(const array& input, array& output) override {
|
||||
void all_sum(const array&, array&, Stream) override {
|
||||
throw std::runtime_error(
|
||||
"Communication not implemented in an empty distributed group.");
|
||||
}
|
||||
void all_gather(const array& input, array& output) override {
|
||||
void all_gather(const array&, array&, Stream) override {
|
||||
throw std::runtime_error(
|
||||
"Communication not implemented in an empty distributed group.");
|
||||
}
|
||||
void send(const array& input, int dst) override {
|
||||
void send(const array&, int, Stream) override {
|
||||
throw std::runtime_error(
|
||||
"Communication not implemented in an empty distributed group.");
|
||||
}
|
||||
void recv(array& out, int src) override {
|
||||
void recv(array&, int, Stream) override {
|
||||
throw std::runtime_error(
|
||||
"Communication not implemented in an empty distributed group.");
|
||||
}
|
||||
@@ -122,10 +116,6 @@ Group init(bool strict /* = false */, const std::string& bk /* = "any" */) {
|
||||
backends.insert({"any", group});
|
||||
}
|
||||
backends.insert({std::move(bk_), group});
|
||||
|
||||
// Ensure the communication stream is alive before
|
||||
// the graph is evaluated
|
||||
detail::communication_stream();
|
||||
return Group(group);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user