mirror of
https://github.com/ml-explore/mlx.git
synced 2025-09-22 13:28:11 +08:00
Add mode parameter for quantization (#2499)
* add mode parameter for quantization * mxfp4 quantize/dequantize + start of optional biases * mxfp4 works * speedup * cpu mxfp4 * fix * fix test tol * fix * refactor * add quant mode enum
This commit is contained in:
@@ -198,6 +198,12 @@ class TestBase(mlx_tests.MLXTestCase):
|
||||
self.assertTrue(isinstance(m.layers[1], nn.ReLU))
|
||||
self.assertTrue(isinstance(m.layers[2], nn.QuantizedLinear))
|
||||
|
||||
nn.quantize(m, group_size=32, mode="mxfp4")
|
||||
self.assertTrue(isinstance(m.layers[0], nn.QuantizedEmbedding))
|
||||
self.assertTrue(isinstance(m.layers[1], nn.ReLU))
|
||||
self.assertTrue(isinstance(m.layers[2], nn.QuantizedLinear))
|
||||
self.assertTrue(isinstance(m.layers[2].scales, mx.array))
|
||||
|
||||
def test_quantize_freeze(self):
|
||||
lin = nn.Linear(512, 512)
|
||||
qlin = lin.to_quantized()
|
||||
|
Reference in New Issue
Block a user