Fix q4_1 dequantization

2025-12-16 00:18:52 +08:00 · 2024-01-10 10:17:13 -05:00
parent fe34f6ec5c
commit 55d6267c31
1 changed files with 1 additions and 1 deletions
--- a/gguflib.c
+++ b/gguflib.c
@@ -834,7 +834,7 @@ void gguf_q4_1_to_float(void *weights_data, void *dst, uint64_t count, store_flo
        /* For each block get the scale and convert all the
         * weights in the block. */
        float scale = from_half(*((uint16_t*)block));
-        float bias = from_half(*((uint16_t*)block+2));
+        float bias = from_half(*((uint16_t*)block+1));
        /* First 16 weights are in the lower bits */
        for (uint32_t j = 0; j < 16; j++) {
            uint8_t value = block[j+4]; // j+2 to skip the scale and bias bytes.