mirror of
https://github.com/ml-explore/mlx.git
synced 2025-09-01 04:24:36 +08:00
Quantize with groups of 32 (#511)
* allow quantize with group sizes of 32 * missing cpu dispatch * remove print * Fix qvm for group_size 32 --------- Co-authored-by: Angelos Katharopoulos <a_katharopoulos@apple.com>
This commit is contained in:
@@ -72,6 +72,9 @@ def _quant_matmul(x, w, s, b, transpose, group_size, bits):
|
||||
|
||||
|
||||
quant_matmul = {
|
||||
"quant_matmul_32_2": partial(_quant_matmul, transpose=False, group_size=32, bits=2),
|
||||
"quant_matmul_32_4": partial(_quant_matmul, transpose=False, group_size=32, bits=4),
|
||||
"quant_matmul_32_8": partial(_quant_matmul, transpose=False, group_size=32, bits=8),
|
||||
"quant_matmul_64_2": partial(_quant_matmul, transpose=False, group_size=64, bits=2),
|
||||
"quant_matmul_64_4": partial(_quant_matmul, transpose=False, group_size=64, bits=4),
|
||||
"quant_matmul_64_8": partial(_quant_matmul, transpose=False, group_size=64, bits=8),
|
||||
@@ -84,6 +87,15 @@ quant_matmul = {
|
||||
"quant_matmul_128_8": partial(
|
||||
_quant_matmul, transpose=False, group_size=128, bits=8
|
||||
),
|
||||
"quant_matmul_t_32_2": partial(
|
||||
_quant_matmul, transpose=True, group_size=32, bits=2
|
||||
),
|
||||
"quant_matmul_t_32_4": partial(
|
||||
_quant_matmul, transpose=True, group_size=32, bits=4
|
||||
),
|
||||
"quant_matmul_t_32_8": partial(
|
||||
_quant_matmul, transpose=True, group_size=32, bits=8
|
||||
),
|
||||
"quant_matmul_t_64_2": partial(
|
||||
_quant_matmul, transpose=True, group_size=64, bits=2
|
||||
),
|
||||
|
Reference in New Issue
Block a user