mirror of
				https://github.com/ml-explore/mlx.git
				synced 2025-11-04 02:28:13 +08:00 
			
		
		
		
	Fused Affine Quantize/Dequantize ops (#1282)
* Add fast affine dequantize * add full quantize kernel * fused kernel with scale/bias computation * fix docstring * fix no jit error * fix test * test fix * reduce fast api to only affine_quantize
This commit is contained in:
		@@ -12,11 +12,12 @@ class TestQuantized(mlx_tests.MLXTestCase):
 | 
			
		||||
        w = mx.random.normal(shape=(128, 512))
 | 
			
		||||
        for gs in [32, 64, 128]:
 | 
			
		||||
            for b in [2, 4, 8]:
 | 
			
		||||
                w_q, scales, biases = mx.quantize(w, gs, b)
 | 
			
		||||
                w_hat = mx.dequantize(w_q, scales, biases, gs, b)
 | 
			
		||||
                errors = (w - w_hat).abs().reshape(*scales.shape, -1)
 | 
			
		||||
                eps = 1e-6
 | 
			
		||||
                self.assertTrue((errors <= (scales[..., None] + eps).abs()).all())
 | 
			
		||||
                with self.subTest(gs=gs, b=b):
 | 
			
		||||
                    w_q, scales, biases = mx.quantize(w, group_size=gs, bits=b)
 | 
			
		||||
                    w_hat = mx.dequantize(w_q, scales, biases, gs, b)
 | 
			
		||||
                    errors = (w - w_hat).abs().reshape(*scales.shape, -1)
 | 
			
		||||
                    eps = 1e-6
 | 
			
		||||
                    self.assertTrue((errors <= (scales[..., None] + eps).abs()).all())
 | 
			
		||||
 | 
			
		||||
        # test quantize/dequantize 0s
 | 
			
		||||
        a = mx.zeros((256, 512))
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user