Merge pull request #16 from jart/ftz

Remove flush to zero from bf16
2025-12-16 00:18:52 +08:00 · 2025-01-09 16:46:11 +01:00
parent bd64d6e812 918234ce80
commit 8fa6eb6523
1 changed files with 0 additions and 8 deletions
--- a/bf16.h
+++ b/bf16.h
@@ -52,10 +52,6 @@ static inline float from_brain(uint16_t h) {
 /**
 * Converts float32 to brain16.
 *
 * This function is binary identical to AMD Zen4 VCVTNEPS2BF16.
 * Subnormals shall be flushed to zero, and NANs will be quiet.
 * This code should vectorize nicely if using modern compilers.
 */
 static inline uint16_t to_brain(float s) {
    uint16_t h;
@@ -68,10 +64,6 @@ static inline uint16_t to_brain(float s) {
        h = (u.i >> 16) | 64; /* force to quiet */
        return h;
    }
    if (!(u.i & 0x7f800000)) { /* subnormal */
        h = (u.i & 0x80000000) >> 16; /* flush to zero */
        return h;
    }
    return (u.i + (0x7fff + ((u.i >> 16) & 1))) >> 16;
 }