[CUDA] Matmul utils initial commit (#2441)

This commit is contained in:
Angelos Katharopoulos
2025-08-01 14:22:25 -07:00
committed by GitHub
parent 86258f292f
commit be9bc96da4
32 changed files with 856 additions and 14 deletions

View File

@@ -261,6 +261,7 @@ void affine_quantize(
kernel,
num_blocks,
block_dims,
0,
w.data<T>(),
wq.data<uint8_t>(),
scales.data<T>(),
@@ -316,6 +317,7 @@ void affine_dequantize(
kernel,
num_blocks,
block_dims,
0,
wq.data<uint8_t>(),
scales.data<T>(),
biases.data<T>(),