Add NF4 quant

2025-11-06 20:20:11 +08:00 · 2024-06-21 10:55:42 -07:00
parent af9079cc1f
commit 152092957c
12 changed files with 530 additions and 212 deletions
--- a/benchmarks/python/comparative/bench_mlx.py
+++ b/benchmarks/python/comparative/bench_mlx.py
@@ -62,10 +62,17 @@ def matmul(x, y):

 def _quant_matmul(x, w, s, b, transpose, group_size, bits):
    ys = []
-    for i in range(10):
+    for i in range(100):
        ys.append(
            mx.quantized_matmul(
-                x, w, s, b, transpose=transpose, group_size=group_size, bits=bits
+                x,
+                w,
+                s,
+                b,
+                transpose=transpose,
+                group_size=group_size,
+                bits=bits,
+                mode=mx.QuantizationMode.DEFAULT,
            )
        )
    mx.eval(ys)