fix saturate

This commit is contained in:
Awni Hannun
2025-10-21 12:15:44 -07:00
parent 9b1ee2df33
commit c1637e73e0
2 changed files with 2 additions and 2 deletions

View File

@@ -140,7 +140,7 @@ struct ToFP8 {
auto result_high = Simd<uint8_t, N>(f_bits_high >> 20); auto result_high = Simd<uint8_t, N>(f_bits_high >> 20);
result = select(f_bits < (121 << 23), result_low, result_high); result = select(f_bits < (121 << 23), result_low, result_high);
auto result_sat = Simd<uint8_t, N>(fp8_max); auto result_sat = Simd<uint8_t, N>(0x7E);
result = select(f_bits >= fp8_max, result_sat, result); result = select(f_bits >= fp8_max, result_sat, result);
return result | Simd<uint8_t, N>(sign >> 24); return result | Simd<uint8_t, N>(sign >> 24);
} }

View File

@@ -459,7 +459,7 @@ struct ToFP8 {
f_bits ^= sign; f_bits ^= sign;
if (f_bits >= fp8_max) { if (f_bits >= fp8_max) {
// Default behavior saturates to min/max // Default behavior saturates to min/max
result = fp8_max; result = 0x7E;
} else { } else {
if (f_bits < (121 << 23)) { if (f_bits < (121 << 23)) {
f_bits = f_bits =