mirror of
https://github.com/ml-explore/mlx.git
synced 2025-12-16 01:49:05 +08:00
fix saturate
This commit is contained in:
@@ -140,7 +140,7 @@ struct ToFP8 {
|
||||
auto result_high = Simd<uint8_t, N>(f_bits_high >> 20);
|
||||
result = select(f_bits < (121 << 23), result_low, result_high);
|
||||
|
||||
auto result_sat = Simd<uint8_t, N>(fp8_max);
|
||||
auto result_sat = Simd<uint8_t, N>(0x7E);
|
||||
result = select(f_bits >= fp8_max, result_sat, result);
|
||||
return result | Simd<uint8_t, N>(sign >> 24);
|
||||
}
|
||||
|
||||
@@ -459,7 +459,7 @@ struct ToFP8 {
|
||||
f_bits ^= sign;
|
||||
if (f_bits >= fp8_max) {
|
||||
// Default behavior saturates to min/max
|
||||
result = fp8_max;
|
||||
result = 0x7E;
|
||||
} else {
|
||||
if (f_bits < (121 << 23)) {
|
||||
f_bits =
|
||||
|
||||
Reference in New Issue
Block a user