mirror of
https://github.com/ml-explore/mlx.git
synced 2025-09-18 18:28:12 +08:00
Make sure 0 is represented in the quantization (#1016)
This commit is contained in:

committed by
GitHub

parent
ed83908931
commit
84d61d27aa
@@ -3268,6 +3268,9 @@ std::tuple<array, array, array> quantize(
|
||||
array scales = squeeze(delta, -1, s);
|
||||
array biases = squeeze(w_min, -1, s);
|
||||
|
||||
// making sure that 0 is represented exactly in the resulting quantization
|
||||
biases = multiply(round(divide(biases, scales, s), s), scales, s);
|
||||
|
||||
// Quantize and pack w
|
||||
packed_w =
|
||||
astype(round(divide(subtract(packed_w, w_min, s), delta, s), s), uint32);
|
||||
|
Reference in New Issue
Block a user