From 2afdf380b181ec877d0fadb77187482fb73f0eaf Mon Sep 17 00:00:00 2001
From: Awni Hannun <awni@apple.com>
Date: Fri, 22 Aug 2025 09:42:46 -0700
Subject: [PATCH] comment

---
 mlx/backend/cuda/distributed.cu | 6 +++---
 python/mlx/distributed_run.py   | 6 ++++--
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/mlx/backend/cuda/distributed.cu b/mlx/backend/cuda/distributed.cu
index dba168a68..90eeacd7c 100644
--- a/mlx/backend/cuda/distributed.cu
+++ b/mlx/backend/cuda/distributed.cu
@@ -1,8 +1,8 @@
 // Copyright © 2025 Apple Inc.
 
-#include "mlx/backend/gpu/copy.h"
 #include "mlx/backend/cuda/device.h"
 #include "mlx/backend/cuda/kernel_utils.cuh"
+#include "mlx/backend/gpu/copy.h"
 #include "mlx/distributed/primitives.h"
 #include "mlx/primitives.h"
 
@@ -15,8 +15,8 @@ void AllReduce::eval_gpu(
   assert(inputs.size() == 1);
   assert(outputs.size() == 1);
 
-
-  auto set_input_output = [s = stream()](const array& in, array& out) -> std::pair<array, array> {
+  auto set_input_output =
+      [s = stream()](const array& in, array& out) -> std::pair<array, array> {
     if (!in.flags().row_contiguous) {
       copy_gpu(in, out, CopyType::General, s);
       return {out, out};
diff --git a/python/mlx/distributed_run.py b/python/mlx/distributed_run.py
index 31274d4a9..bb0e3c633 100644
--- a/python/mlx/distributed_run.py
+++ b/python/mlx/distributed_run.py
@@ -56,7 +56,7 @@ def parse_hardware_ports(ports_string):
 
 
 def get_num_nvidia_gpus():
-    result = run(['nvidia-smi', "-L"], capture_output=True, text=True, check=True)
+    result = run(["nvidia-smi", "-L"], capture_output=True, text=True, check=True)
     return len(result.stdout.strip().split("\n"))
 
 
@@ -433,7 +433,9 @@ def launch_nccl(parser, hosts, args, command):
     base_env = os.environ.copy()
     base_env.update(
         {
-            "NCCL_DEBUG": base_env.get("NCCL_DEBUG", "DEBUG"),
+            "NCCL_DEBUG": base_env.get(
+                "NCCL_DEBUG", "INFO" if args.verbose else "DEBUG"
+            ),
             "NCCL_SOCKET_IFNAME": "lo",  # Use loopback for local communication
             "NCCL_HOST_IP": master_host,
             "NCCL_PORT": str(master_port),