mirror of
https://github.com/ml-explore/mlx.git
synced 2025-12-16 01:49:05 +08:00
[CUDA] Matmul utils initial commit (#2441)
This commit is contained in:
committed by
GitHub
parent
86258f292f
commit
be9bc96da4
@@ -261,6 +261,7 @@ void affine_quantize(
|
||||
kernel,
|
||||
num_blocks,
|
||||
block_dims,
|
||||
0,
|
||||
w.data<T>(),
|
||||
wq.data<uint8_t>(),
|
||||
scales.data<T>(),
|
||||
@@ -316,6 +317,7 @@ void affine_dequantize(
|
||||
kernel,
|
||||
num_blocks,
|
||||
block_dims,
|
||||
0,
|
||||
wq.data<uint8_t>(),
|
||||
scales.data<T>(),
|
||||
biases.data<T>(),
|
||||
|
||||
Reference in New Issue
Block a user