From a4858afb4df2268b2b47afed23397a9afeb331ff Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 30 Dec 2023 17:23:27 +0100 Subject: [PATCH] Implement f16/f32 in gguf_tensor_to_float(). --- gguflib.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/gguflib.c b/gguflib.c index ade0b6c..5e71ecf 100644 --- a/gguflib.c +++ b/gguflib.c @@ -511,7 +511,16 @@ float *gguf_tensor_to_float(gguf_tensor *tensor) { gguf_get_tensor_type_features(tensor->type); uint64_t block_size = tf->bytes_per_block; float *f = malloc(tensor->num_weights*sizeof(float)); - if (tensor->type == GGUF_TYPE_Q8_0) { + if (tensor->type == GGUF_TYPE_F32) { + memcpy(f, tensor->weights_data, tensor->num_weights*sizeof(float)); + } else if (tensor->type == GGUF_TYPE_F16) { + uint64_t i = 0; // i-th weight to dequantize. + uint16_t *w16 = (uint16_t*) tensor->weights_data; + while(i < tensor->num_weights) { + f[i] = from_half(w16[i]); + i++; + } + } else if (tensor->type == GGUF_TYPE_Q8_0) { /* Very simple layout: |16 bit delta|32 x 8bit weights| * Each weight is delta * quantized_weight[0..31] */ int8_t *block = (int8_t*)tensor->weights_data;