mirror of
https://github.com/ml-explore/mlx.git
synced 2025-06-24 17:31:16 +08:00
Only fail when 10 consecutive socket errors occur (#1928)
This commit is contained in:
parent
fd0d63ba5b
commit
0792ff02ff
@ -199,6 +199,7 @@ class SocketThread {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void worker() {
|
void worker() {
|
||||||
|
int error_count = 0;
|
||||||
bool delete_recv = false;
|
bool delete_recv = false;
|
||||||
bool delete_send = false;
|
bool delete_send = false;
|
||||||
while (true) {
|
while (true) {
|
||||||
@ -235,10 +236,11 @@ class SocketThread {
|
|||||||
task.buffer = static_cast<char*>(task.buffer) + r;
|
task.buffer = static_cast<char*>(task.buffer) + r;
|
||||||
task.size -= r;
|
task.size -= r;
|
||||||
delete_recv = task.size == 0;
|
delete_recv = task.size == 0;
|
||||||
|
error_count = 0;
|
||||||
} else if (errno != EAGAIN) {
|
} else if (errno != EAGAIN) {
|
||||||
|
error_count++;
|
||||||
log_info(
|
log_info(
|
||||||
true, "Receiving from socket", fd_, "failed with errno", errno);
|
true, "Receiving from socket", fd_, "failed with errno", errno);
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!sends_.empty()) {
|
if (!sends_.empty()) {
|
||||||
@ -248,11 +250,17 @@ class SocketThread {
|
|||||||
task.buffer = static_cast<char*>(task.buffer) + r;
|
task.buffer = static_cast<char*>(task.buffer) + r;
|
||||||
task.size -= r;
|
task.size -= r;
|
||||||
delete_send = task.size == 0;
|
delete_send = task.size == 0;
|
||||||
|
error_count = 0;
|
||||||
} else if (errno != EAGAIN) {
|
} else if (errno != EAGAIN) {
|
||||||
|
error_count++;
|
||||||
log_info(true, "Sending to socket", fd_, "failed with errno", errno);
|
log_info(true, "Sending to socket", fd_, "failed with errno", errno);
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (error_count >= 10) {
|
||||||
|
log_info(true, "Too many send/recv errors. Aborting...");
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -112,7 +112,12 @@ def extract_rings(hosts, index):
|
|||||||
break
|
break
|
||||||
if not ring:
|
if not ring:
|
||||||
break
|
break
|
||||||
rings.append(normalize(concretize(ring, used_ports)))
|
try:
|
||||||
|
rings.append(normalize(concretize(ring, used_ports)))
|
||||||
|
except RuntimeError:
|
||||||
|
if len(rings) > 0:
|
||||||
|
return rings
|
||||||
|
raise
|
||||||
|
|
||||||
return rings
|
return rings
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user