fix saturate

This commit is contained in:
Awni Hannun
2025-10-21 12:15:44 -07:00
parent 9b1ee2df33
commit c1637e73e0
2 changed files with 2 additions and 2 deletions

View File

@@ -140,7 +140,7 @@ struct ToFP8 {
auto result_high = Simd<uint8_t, N>(f_bits_high >> 20);
result = select(f_bits < (121 << 23), result_low, result_high);
auto result_sat = Simd<uint8_t, N>(fp8_max);
auto result_sat = Simd<uint8_t, N>(0x7E);
result = select(f_bits >= fp8_max, result_sat, result);
return result | Simd<uint8_t, N>(sign >> 24);
}

View File

@@ -459,7 +459,7 @@ struct ToFP8 {
f_bits ^= sign;
if (f_bits >= fp8_max) {
// Default behavior saturates to min/max
result = fp8_max;
result = 0x7E;
} else {
if (f_bits < (121 << 23)) {
f_bits =