Fix compilation with CUDA 11 (#2331)

2025-12-16 01:49:05 +08:00 · 2025-07-08 12:00:43 +09:00
parent 4a9b29a875
commit 2ca533b279
11 changed files with 115 additions and 56 deletions
--- a/mlx/backend/cuda/softmax.cu
+++ b/mlx/backend/cuda/softmax.cu
@@ -43,7 +43,7 @@ __global__ void softmax(const T* in, T* out, int axis_size) {
  // Thread reduce.
  AccT prevmax;
  AccT maxval = Limits<AccT>::finite_min();
-  AccT normalizer = 0;
+  AccT normalizer = cast_to<AccT>(0);
  for (int r = 0; r < cuda::ceil_div(axis_size, BLOCK_DIM * N_READS); r++) {
    AccT vals[N_READS];
    cub::LoadDirectBlocked(