[CUDA] Matmul utils initial commit (#2441)

This commit is contained in:
Angelos Katharopoulos
2025-08-01 14:22:25 -07:00
committed by GitHub
parent 86258f292f
commit be9bc96da4
32 changed files with 856 additions and 14 deletions

View File

@@ -230,7 +230,7 @@ void col_reduce_looped(
auto kernel =
cu::col_reduce_looped<T, U, OP, reduce_ndim(), BM, BN, N_READS>;
encoder.add_kernel_node(
kernel, grid, blocks, indata, out.data<U>(), args);
kernel, grid, blocks, 0, indata, out.data<U>(), args);
});
});
});