From 55d6267c3133349d0e1b19565c12d73bad19d878 Mon Sep 17 00:00:00 2001 From: Juarez Bochi Date: Wed, 10 Jan 2024 10:17:13 -0500 Subject: [PATCH] Fix q4_1 dequantization --- gguflib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gguflib.c b/gguflib.c index 6f7aac2..7aa2696 100644 --- a/gguflib.c +++ b/gguflib.c @@ -834,7 +834,7 @@ void gguf_q4_1_to_float(void *weights_data, void *dst, uint64_t count, store_flo /* For each block get the scale and convert all the * weights in the block. */ float scale = from_half(*((uint16_t*)block)); - float bias = from_half(*((uint16_t*)block+2)); + float bias = from_half(*((uint16_t*)block+1)); /* First 16 weights are in the lower bits */ for (uint32_t j = 0; j < 16; j++) { uint8_t value = block[j+4]; // j+2 to skip the scale and bias bytes.