From 2afdf380b181ec877d0fadb77187482fb73f0eaf Mon Sep 17 00:00:00 2001 From: Awni Hannun Date: Fri, 22 Aug 2025 09:42:46 -0700 Subject: [PATCH] comment --- mlx/backend/cuda/distributed.cu | 6 +++--- python/mlx/distributed_run.py | 6 ++++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/mlx/backend/cuda/distributed.cu b/mlx/backend/cuda/distributed.cu index dba168a68..90eeacd7c 100644 --- a/mlx/backend/cuda/distributed.cu +++ b/mlx/backend/cuda/distributed.cu @@ -1,8 +1,8 @@ // Copyright © 2025 Apple Inc. -#include "mlx/backend/gpu/copy.h" #include "mlx/backend/cuda/device.h" #include "mlx/backend/cuda/kernel_utils.cuh" +#include "mlx/backend/gpu/copy.h" #include "mlx/distributed/primitives.h" #include "mlx/primitives.h" @@ -15,8 +15,8 @@ void AllReduce::eval_gpu( assert(inputs.size() == 1); assert(outputs.size() == 1); - - auto set_input_output = [s = stream()](const array& in, array& out) -> std::pair { + auto set_input_output = + [s = stream()](const array& in, array& out) -> std::pair { if (!in.flags().row_contiguous) { copy_gpu(in, out, CopyType::General, s); return {out, out}; diff --git a/python/mlx/distributed_run.py b/python/mlx/distributed_run.py index 31274d4a9..bb0e3c633 100644 --- a/python/mlx/distributed_run.py +++ b/python/mlx/distributed_run.py @@ -56,7 +56,7 @@ def parse_hardware_ports(ports_string): def get_num_nvidia_gpus(): - result = run(['nvidia-smi', "-L"], capture_output=True, text=True, check=True) + result = run(["nvidia-smi", "-L"], capture_output=True, text=True, check=True) return len(result.stdout.strip().split("\n")) @@ -433,7 +433,9 @@ def launch_nccl(parser, hosts, args, command): base_env = os.environ.copy() base_env.update( { - "NCCL_DEBUG": base_env.get("NCCL_DEBUG", "DEBUG"), + "NCCL_DEBUG": base_env.get( + "NCCL_DEBUG", "INFO" if args.verbose else "DEBUG" + ), "NCCL_SOCKET_IFNAME": "lo", # Use loopback for local communication "NCCL_HOST_IP": master_host, "NCCL_PORT": str(master_port),