Compare commits

...

2 Commits

Author SHA1 Message Date
Awni Hannun
c1637e73e0 fix saturate 2025-10-21 12:15:44 -07:00
Awni Hannun
9b1ee2df33 fix no gpu/cpu 2025-10-21 12:04:53 -07:00
4 changed files with 4 additions and 2 deletions

View File

@@ -140,7 +140,7 @@ struct ToFP8 {
auto result_high = Simd<uint8_t, N>(f_bits_high >> 20);
result = select(f_bits < (121 << 23), result_low, result_high);
auto result_sat = Simd<uint8_t, N>(fp8_max);
auto result_sat = Simd<uint8_t, N>(0x7E);
result = select(f_bits >= fp8_max, result_sat, result);
return result | Simd<uint8_t, N>(sign >> 24);
}

View File

@@ -459,7 +459,7 @@ struct ToFP8 {
f_bits ^= sign;
if (f_bits >= fp8_max) {
// Default behavior saturates to min/max
result = fp8_max;
result = 0x7E;
} else {
if (f_bits < (121 << 23)) {
f_bits =

View File

@@ -130,6 +130,7 @@ NO_CPU(View)
namespace fast {
NO_CPU_MULTI(Quantize)
NO_CPU_MULTI(ConvertFP8)
} // namespace fast
namespace distributed {

View File

@@ -154,6 +154,7 @@ NO_GPU_USE_FALLBACK(RMSNorm)
NO_GPU_MULTI(RMSNormVJP)
NO_GPU_USE_FALLBACK(RoPE)
NO_GPU(ScaledDotProductAttention)
NO_GPU_MULTI(ConvertFP8)
NO_GPU_MULTI(Quantize)
NO_GPU_MULTI(CustomKernel)
} // namespace fast