Implement f16/f32 in gguf_tensor_to_float().

2025-12-16 00:18:52 +08:00 · 2023-12-30 17:23:27 +01:00
parent 136e04977c
commit a4858afb4d
1 changed files with 10 additions and 1 deletions
--- a/gguflib.c
+++ b/gguflib.c
@@ -511,7 +511,16 @@ float *gguf_tensor_to_float(gguf_tensor *tensor) {
        gguf_get_tensor_type_features(tensor->type);
    uint64_t block_size = tf->bytes_per_block;
    float *f = malloc(tensor->num_weights*sizeof(float));
-    if (tensor->type == GGUF_TYPE_Q8_0) {
+    if (tensor->type == GGUF_TYPE_F32) {
+        memcpy(f, tensor->weights_data, tensor->num_weights*sizeof(float));
+    } else if (tensor->type == GGUF_TYPE_F16) {
+        uint64_t i = 0; // i-th weight to dequantize.
+        uint16_t *w16 = (uint16_t*) tensor->weights_data;
+        while(i < tensor->num_weights) {
+            f[i] = from_half(w16[i]);
+            i++;
+        }
+    } else if (tensor->type == GGUF_TYPE_Q8_0) {
        /* Very simple layout: |16 bit delta|32 x 8bit weights|
         * Each weight is delta * quantized_weight[0..31] */
        int8_t *block = (int8_t*)tensor->weights_data;