mirror of
https://github.com/ml-explore/mlx.git
synced 2025-11-01 08:38:12 +08:00
Fix the order of hosts in the ring (#2718)
Some checks failed
Nightly Build / build_linux_release (3.10) (push) Has been cancelled
Nightly Build / build_linux_release (3.14) (push) Has been cancelled
Nightly Build / build_linux_with_tests (3.10) (push) Has been cancelled
Nightly Build / build_linux_with_tests (3.11) (push) Has been cancelled
Nightly Build / build_linux_with_tests (3.12) (push) Has been cancelled
Nightly Build / build_linux_with_tests (3.13) (push) Has been cancelled
Nightly Build / build_linux_with_tests (3.14) (push) Has been cancelled
Nightly Build / build_mac_release (3.10) (push) Has been cancelled
Nightly Build / build_mac_release (3.13) (push) Has been cancelled
Nightly Build / build_cuda_with_tests (push) Has been cancelled
Nightly Build / build_cuda_release (push) Has been cancelled
Some checks failed
Nightly Build / build_linux_release (3.10) (push) Has been cancelled
Nightly Build / build_linux_release (3.14) (push) Has been cancelled
Nightly Build / build_linux_with_tests (3.10) (push) Has been cancelled
Nightly Build / build_linux_with_tests (3.11) (push) Has been cancelled
Nightly Build / build_linux_with_tests (3.12) (push) Has been cancelled
Nightly Build / build_linux_with_tests (3.13) (push) Has been cancelled
Nightly Build / build_linux_with_tests (3.14) (push) Has been cancelled
Nightly Build / build_mac_release (3.10) (push) Has been cancelled
Nightly Build / build_mac_release (3.13) (push) Has been cancelled
Nightly Build / build_cuda_with_tests (push) Has been cancelled
Nightly Build / build_cuda_release (push) Has been cancelled
This commit is contained in:
committed by
GitHub
parent
68c5fa1c95
commit
b901a9f311
@@ -636,9 +636,17 @@ def prepare_tb_ring(args, hosts):
|
|||||||
if ip0 > 255:
|
if ip0 > 255:
|
||||||
raise ValueError("Ran out of available local IPs for the ring")
|
raise ValueError("Ran out of available local IPs for the ring")
|
||||||
|
|
||||||
|
# Extract the host order from the first ring
|
||||||
|
hostmap = dict((r[0][0], r[1][0]) for r in rings[0])
|
||||||
|
first_host = min(hostmap.keys())
|
||||||
|
order = [first_host]
|
||||||
|
while hostmap[order[-1]] != first_host:
|
||||||
|
order.append(hostmap[order[-1]])
|
||||||
|
|
||||||
# Create the hostfile
|
# Create the hostfile
|
||||||
hostfile = []
|
hostfile = []
|
||||||
for i, h in enumerate(hosts):
|
for i in order:
|
||||||
|
h = hosts[i]
|
||||||
host = {
|
host = {
|
||||||
"ssh": h.ssh_hostname,
|
"ssh": h.ssh_hostname,
|
||||||
"ips": [
|
"ips": [
|
||||||
|
|||||||
Reference in New Issue
Block a user