Compare commits

...

2 Commits

Author SHA1 Message Date
Awni Hannun
c1637e73e0 fix saturate 2025-10-21 12:15:44 -07:00
Awni Hannun
9b1ee2df33 fix no gpu/cpu 2025-10-21 12:04:53 -07:00
4 changed files with 4 additions and 2 deletions

View File

@@ -140,7 +140,7 @@ struct ToFP8 {
auto result_high = Simd<uint8_t, N>(f_bits_high >> 20); auto result_high = Simd<uint8_t, N>(f_bits_high >> 20);
result = select(f_bits < (121 << 23), result_low, result_high); result = select(f_bits < (121 << 23), result_low, result_high);
auto result_sat = Simd<uint8_t, N>(fp8_max); auto result_sat = Simd<uint8_t, N>(0x7E);
result = select(f_bits >= fp8_max, result_sat, result); result = select(f_bits >= fp8_max, result_sat, result);
return result | Simd<uint8_t, N>(sign >> 24); return result | Simd<uint8_t, N>(sign >> 24);
} }

View File

@@ -459,7 +459,7 @@ struct ToFP8 {
f_bits ^= sign; f_bits ^= sign;
if (f_bits >= fp8_max) { if (f_bits >= fp8_max) {
// Default behavior saturates to min/max // Default behavior saturates to min/max
result = fp8_max; result = 0x7E;
} else { } else {
if (f_bits < (121 << 23)) { if (f_bits < (121 << 23)) {
f_bits = f_bits =

View File

@@ -130,6 +130,7 @@ NO_CPU(View)
namespace fast { namespace fast {
NO_CPU_MULTI(Quantize) NO_CPU_MULTI(Quantize)
NO_CPU_MULTI(ConvertFP8)
} // namespace fast } // namespace fast
namespace distributed { namespace distributed {

View File

@@ -154,6 +154,7 @@ NO_GPU_USE_FALLBACK(RMSNorm)
NO_GPU_MULTI(RMSNormVJP) NO_GPU_MULTI(RMSNormVJP)
NO_GPU_USE_FALLBACK(RoPE) NO_GPU_USE_FALLBACK(RoPE)
NO_GPU(ScaledDotProductAttention) NO_GPU(ScaledDotProductAttention)
NO_GPU_MULTI(ConvertFP8)
NO_GPU_MULTI(Quantize) NO_GPU_MULTI(Quantize)
NO_GPU_MULTI(CustomKernel) NO_GPU_MULTI(CustomKernel)
} // namespace fast } // namespace fast