[CUDA] --compress-mode requires CUDA 12.8 (#2407 )

Remove unused code in Convolution::vjp (#2408 )
Fix an error in the comment for mx.dequantize (#2409 )
2025-12-16 01:49:05 +08:00 · 2025-07-23 06:11:11 -07:00 · 2025-07-23 06:11:00 -07:00 · 2025-07-23 06:10:50 -07:00
3 changed files with 4 additions and 18 deletions
--- a/mlx/backend/cuda/CMakeLists.txt
+++ b/mlx/backend/cuda/CMakeLists.txt
@@ -88,7 +88,9 @@ endif()
 target_compile_options(
  mlx PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:--Wno-deprecated-gpu-targets>")

-if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.4.0)
+# Use stronger binaries compression. This feature was introduced in CUDA 12.8
+# and requires drivers released after CUDA 12.4.
+if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8.0)
  target_compile_options(
    mlx PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:--compress-mode=size>")
 endif()
--- a/mlx/primitives.cpp
+++ b/mlx/primitives.cpp
@@ -1271,19 +1271,6 @@ std::vector<array> Convolution::vjp(
        has_neg_padding |= (pd < 0);
      }

-      auto padding_lo_ = std::vector<int>(padding_lo);
-      auto padding_hi_ = std::vector<int>(padding_hi);
-
-      // Use negative padding on the gradient output
-      if (has_neg_padding) {
-        for (auto& p : padding_lo_) {
-          p = std::max(0, p);
-        }
-        for (auto& p : padding_hi_) {
-          p = std::max(0, p);
-        }
-      }
-
      auto wt_trans = group_transpose(wt, 0, 1, -1);
      auto grad = conv_general(
          /* const array& input = */ cotan,
@@ -1305,12 +1292,9 @@ std::vector<array> Convolution::vjp(
        for (int i = 0; i < grad.ndim() - 2; i++) {
          if (padding_lo[i] < 0) {
            starts[i + 1] -= padding_lo[i];
-            padding_lo[i] = 0;
          }
-
          if (padding_hi[i] < 0) {
            stops[i + 1] += padding_hi[i];
-            padding_hi[i] = 0;
          }
        }

--- a/python/src/ops.cpp
+++ b/python/src/ops.cpp
@@ -4258,7 +4258,7 @@ void init_ops(nb::module_& m) {

        .. math::

-          w_i = s \hat{w_i} - \beta
+          w_i = s \hat{w_i} + \beta

        Args:
          w (array): Matrix to be quantized
Author	SHA1	Message	Date
Cheng	0f5ce173da	[CUDA] --compress-mode requires CUDA 12.8 (#2407 )	2025-07-23 06:11:11 -07:00
Cheng	588854195f	Remove unused code in Convolution::vjp (#2408 )	2025-07-23 06:11:00 -07:00
Fangjun Kuang	28d068bce6	Fix an error in the comment for mx.dequantize (#2409 )	2025-07-23 06:10:50 -07:00