From 55d6267c3133349d0e1b19565c12d73bad19d878 Mon Sep 17 00:00:00 2001
From: Juarez Bochi <jbochi@gmail.com>
Date: Wed, 10 Jan 2024 10:17:13 -0500
Subject: [PATCH] Fix q4_1 dequantization

---
 gguflib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gguflib.c b/gguflib.c
index 6f7aac2..7aa2696 100644
--- a/gguflib.c
+++ b/gguflib.c
@@ -834,7 +834,7 @@ void gguf_q4_1_to_float(void *weights_data, void *dst, uint64_t count, store_flo
         /* For each block get the scale and convert all the
          * weights in the block. */
         float scale = from_half(*((uint16_t*)block));
-        float bias = from_half(*((uint16_t*)block+2));
+        float bias = from_half(*((uint16_t*)block+1));
         /* First 16 weights are in the lower bits */
         for (uint32_t j = 0; j < 16; j++) {
             uint8_t value = block[j+4]; // j+2 to skip the scale and bias bytes.