Fix compilation with CUDA 11 (#2331)

This commit is contained in:
Cheng
2025-07-08 12:00:43 +09:00
committed by GitHub
parent 4a9b29a875
commit 2ca533b279
11 changed files with 115 additions and 56 deletions

View File

@@ -43,7 +43,7 @@ __global__ void softmax(const T* in, T* out, int axis_size) {
// Thread reduce.
AccT prevmax;
AccT maxval = Limits<AccT>::finite_min();
AccT normalizer = 0;
AccT normalizer = cast_to<AccT>(0);
for (int r = 0; r < cuda::ceil_div(axis_size, BLOCK_DIM * N_READS); r++) {
AccT vals[N_READS];
cub::LoadDirectBlocked(