Add BF16 support and fix warnings

This change updates the data type definitions to be the same as the latest source code. Support for the bfloat16 data type is available however it can't interpret the IQ quantization formats yet. Cleanup of compiler warnings and other nits have been fixed, but behavioral changes have been avoided, and no new features are as of yet added.
2025-12-16 00:18:52 +08:00 · 2024-05-25 22:48:18 -07:00
parent 3e5c0a464d
commit ede59bb742
6 changed files with 201 additions and 27 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
 gguf-tools
--- a/4
+++ b/4
@@ -1,8 +1,8 @@
 all: gguf-tools
-gguf-tools: gguf-tools.c gguflib.c gguflib.h sds.c sds.h sdsalloc.h fp16.h
+gguf-tools: gguf-tools.c gguflib.c gguflib.h sds.c sds.h sdsalloc.h fp16.h bf16.h
 	$(CC) gguf-tools.c gguflib.c sds.c fp16.c \
-		-march=native -flto -ffast-math \
+		-march=native -ffast-math \
 		-g -ggdb -Wall -W -pedantic -O3 -o gguf-tools
 clean:
--- a/bf16.h
+++ b/bf16.h
@@ -0,0 +1,78 @@
 #ifndef BF16_h
 #define BF16_h
 #include <stdint.h>
 /**
 * Converts brain16 to float32.
 *
 * The bfloat16 floating point format has the following structure:
 *
 *       ┌sign
 *       │
 *       │   ┌exponent
 *       │   │
 *       │   │      ┌mantissa
 *       │   │      │
 *       │┌──┴───┐┌─┴───┐
 *     0b0000000000000000 brain16
 *
 * Since bf16 has the same number of exponent bits as a 32bit float,
 * encoding and decoding numbers becomes relatively straightforward.
 *
 *       ┌sign
 *       │
 *       │   ┌exponent
 *       │   │
 *       │   │      ┌mantissa
 *       │   │      │
 *       │┌──┴───┐┌─┴───────────────────┐
 *     0b00000000000000000000000000000000 IEEE binary32
 *
 * For comparison, the standard fp16 format has fewer exponent bits.
 *
 *       ┌sign
 *       │
 *       │  ┌exponent
 *       │  │
 *       │  │    ┌mantissa
 *       │  │    │
 *       │┌─┴─┐┌─┴──────┐
 *     0b0000000000000000 IEEE binary16
 *
 * @see IEEE 754-2008
 */
 static inline float from_brain(uint16_t h) {
    union {
        float f;
        uint32_t i;
    } u;
    u.i = (uint32_t)h << 16;
    return u.f;
 }
 /**
 * Converts float32 to brain16.
 *
 * This function is binary identical to AMD Zen4 VCVTNEPS2BF16.
 * Subnormals shall be flushed to zero, and NANs will be quiet.
 * This code should vectorize nicely if using modern compilers.
 */
 static inline uint16_t to_brain(float s) {
    uint16_t h;
    union {
        float f;
        uint32_t i;
    } u;
    u.f = s;
    if ((u.i & 0x7fffffff) > 0x7f800000) { /* nan */
        h = (u.i >> 16) | 64; /* force to quiet */
        return h;
    }
    if (!(u.i & 0x7f800000)) { /* subnormal */
        h = (u.i & 0x80000000) >> 16; /* flush to zero */
        return h;
    }
    return (u.i + (0x7fff + ((u.i >> 16) & 1))) >> 16;
 }
 #endif
--- a/gguf-tools.c
+++ b/gguf-tools.c
@@ -5,6 +5,7 @@
 #include <assert.h>
 #include <errno.h>
 #include <math.h>
 #include <inttypes.h>
 #include "gguflib.h"
 #include "sds.h"
@@ -143,7 +144,7 @@ int strmatch(const char *pattern, int patternLen,
 void gguf_tools_show(const char *filename) {
    gguf_ctx *ctx = gguf_open(filename);
    if (ctx == NULL) {
-        perror("Opening GGUF file");
+        perror(filename);
        exit(1);
    }
@@ -166,16 +167,16 @@ void gguf_tools_show(const char *filename) {
    gguf_tensor tensor;
    uint64_t params = 0;
    while (gguf_get_tensor(ctx,&tensor)) {
-        printf("%s tensor %.*s @%llu, %llu weights, dims ",
+        printf("%s tensor %.*s @%" PRIu64 ", %" PRIu64 " weights, dims ",
            gguf_get_tensor_type_name(tensor.type),
            (int)tensor.namelen,
            tensor.name,
            tensor.offset,
            tensor.num_weights);
        for (uint32_t j = 0; j < tensor.ndim; j++) {
-            printf("%s%llu",(j == 0) ? "[" : ",", tensor.dim[j]);
+            printf("%s%" PRIu64 "",(j == 0) ? "[" : ",", tensor.dim[j]);
        }
-        printf("], %llu bytes\n", tensor.bsize);
+        printf("], %" PRIu64 " bytes\n", tensor.bsize);
        params += tensor.num_weights;
    }
@@ -192,13 +193,13 @@ void gguf_tools_show(const char *filename) {
 void gguf_tools_split_mixtral(int *experts_id, const char *mixtral_filename, const char *output_filename) {
    gguf_ctx *mixtral = gguf_open(mixtral_filename);
    if (mixtral == NULL) {
-        perror("Opening Mixtral file");
+        perror(mixtral_filename);
        exit(1);
    }
    gguf_ctx *output = gguf_create(output_filename, GGUF_NONE);
    if (output == NULL) {
-        perror("Opening the output file");
+        perror(output_filename);
        exit(1);
    }
@@ -312,7 +313,7 @@ void gguf_tools_split_mixtral(int *experts_id, const char *mixtral_filename, con
        }
        tensor_off += tensors[j].orig_info.bsize;
    }
-    printf("Output file: after writing tensors info, file size is: %llu\n", output->size);
+    printf("Output file: after writing tensors info, file size is: %" PRIu64 "\n", output->size);
    /* Finally, append the tensors weights. */
    for (uint32_t j = 0; j < num_tensors; j++) {
@@ -333,7 +334,7 @@ void gguf_tools_split_mixtral(int *experts_id, const char *mixtral_filename, con
 void gguf_tools_inspect_weights(const char *filename, const char *tname, uint64_t count) {
    gguf_ctx *ctx = gguf_open(filename);
    if (ctx == NULL) {
-        perror("Opening GGUF file");
+        perror(filename);
        exit(1);
    }
@@ -424,8 +425,8 @@ int tensors_avg_diff(gguf_tensor *t1, gguf_tensor *t2, double *diff) {
    float *weights1 = gguf_tensor_to_float(t1);
    float *weights2 = gguf_tensor_to_float(t2);
    if (weights1 == NULL || weights2 == NULL) {
-        if (weights1) free(weights1);
+        free(weights1);
-        if (weights2) free(weights2);
+        free(weights2);
        return 0;
    }
@@ -444,7 +445,7 @@ int tensors_avg_diff(gguf_tensor *t1, gguf_tensor *t2, double *diff) {
    double avg_diff = tot_diff / t1->num_weights;
    /* Multiply by 75 to normalize the difference of a
-     * random varialbe between -N and +N to 0 - 100% */
+     * random variable between -N and +N to 0 - 100% */
    *diff = avg_diff / avg_mag * 75;
    free(weights1);
@@ -454,9 +455,14 @@ int tensors_avg_diff(gguf_tensor *t1, gguf_tensor *t2, double *diff) {
 void gguf_tools_compare(const char *file1, const char *file2) {
    gguf_ctx *ctx1 = gguf_open(file1);
    if (ctx1 == NULL) {
        perror(file1);
        exit(1);
    }
    gguf_ctx *ctx2 = gguf_open(file2);
-    if (ctx1 == NULL || ctx2 == NULL) {
+    if (ctx2 == NULL) {
-        perror("Opening GGUF files");
+        perror(file2);
        exit(1);
    }
--- a/gguflib.c
+++ b/gguflib.c
@@ -8,9 +8,11 @@
 #include <unistd.h>
 #include <string.h>
 #include <assert.h>
 #include <inttypes.h>
 #include "gguflib.h"
 #include "fp16.h"
 #include "bf16.h"
 /* ============================ Low level functions ========================= */
@@ -43,9 +45,21 @@ struct gguf_tensor_type_features {
    {"q5_k", 256, 176},
    {"q6_k", 256, 210},
    {"q8_k", 256, 292},
    {"iq2_xxs", 256, 66},
    {"iq2_xs", 256, 74},
    {"iq3_xxs", 256, 98},
    {"iq1_s", 256, 110},
    {"iq4_nl", 256, 50},
    {"iq3_s", 256, 110},
    {"iq2_s", 256, 82},
    {"iq4_xs", 256, 136},
    {"i8", 1, 1},
    {"i16", 1, 2},
    {"i32", 1, 4},
    {"i64", 1, 8},
    {"f64", 1, 8},
    {"iq1_m", 256, 56},
    {"bf16", 1, 2},
 };
 /* Return the value type name given the type ID. */
@@ -101,8 +115,8 @@ gguf_ctx *gguf_open(const char *filename) {
    if (fd == -1) return NULL;
    /* Mapping successful. We can create our context object. */
-    gguf_ctx *ctx = malloc(sizeof(*ctx));
+    gguf_ctx *ctx = calloc(1, sizeof(*ctx));
-    memset(ctx,0,sizeof(*ctx));
+    if (!ctx) return NULL;
    ctx->fd = fd;
    ctx->alignment = 32; // Default alignment of GGUF files.
    ctx->data_off = 0;   // Set later.
@@ -363,8 +377,8 @@ void gguf_print_value_callback(void *privdata, uint32_t type, union gguf_value *
    struct gguf_print_options *po = privdata;
    if (po && po->max_array_items && in_array > po->max_array_items) {
        if (in_array-1 == po->max_array_items)
-            printf("... %llu more items of %llu", array_len-in_array+1,
+            printf("... %" PRIu64 " more items of %" PRIu64 "",
-                                                  array_len);
+                   array_len-in_array+1, array_len);
        return;
    }
@@ -396,9 +410,9 @@ void gguf_print_value_callback(void *privdata, uint32_t type, union gguf_value *
        case GGUF_VALUE_TYPE_STRING:
            printf("%.*s", (int)val->string.len, val->string.string); break;
        case GGUF_VALUE_TYPE_UINT64:
-            printf("%llu", val->uint64); break;
+            printf("%" PRIu64 "", val->uint64); break;
        case GGUF_VALUE_TYPE_INT64:
-            printf("%lld", val->int64); break;
+            printf("%" PRId64 "", val->int64); break;
        case GGUF_VALUE_TYPE_FLOAT64:
            printf("%lf", val->float64); break;
        default:
@@ -516,6 +530,12 @@ void gguf_store_f16_callback(void *dst, uint64_t idx, float f) {
    f16[idx] = to_half(f);
 }
 /* Callback used to store BF16 when dequantizing. */
 void gguf_store_bf16_callback(void *dst, uint64_t idx, float f) {
    uint16_t *f16 = dst;
    f16[idx] = to_brain(f);
 }
 /* Q8_0 blocks dequantization to floats.
 * 'dst' is supposed to have enough space for 'count' weights. */
 void gguf_q8_0_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) {
@@ -755,7 +775,7 @@ void gguf_q2_k_to_float(void *weights_data, void *dst, uint64_t count, store_flo
        float scale_of_scales = from_half(*((uint16_t*)(block+16+64)));
        float scale_of_mins = from_half(*((uint16_t*)(block+16+64+2)));
-        float scale, min;
+        float scale = 0, min = 0;
        int bn = 0; // Block number
        for (uint64_t cluster = 0; cluster < 2; cluster++) {
            for (uint64_t j = 0; j < 128; j++) {
@@ -863,7 +883,8 @@ void gguf_q4_1_to_float(void *weights_data, void *dst, uint64_t count, store_flo
 /* FP16 blocks dequantization to floats.
 * 'y' is supposed to have enough space for 'count' weights. */
-void gguf_f16_to_float(void *weights_data, float *dst, uint64_t count, store_float_callback store_callback) {
+static void gguf_f16_to_float(void *weights_data, void *dst, uint64_t count,
                              store_float_callback store_callback) {
    float *f = dst;
    uint64_t i = 0; // i-th weight to dequantize.
    uint16_t *w16 = weights_data;
@@ -877,6 +898,23 @@ void gguf_f16_to_float(void *weights_data, float *dst, uint64_t count, store_flo
    }
 }
 /* BF16 blocks dequantization to floats.
 * 'y' is supposed to have enough space for 'count' weights. */
 static void gguf_bf16_to_float(void *weights_data, void *dst, uint64_t count,
                               store_float_callback store_callback) {
    float *f = dst;
    uint64_t i = 0; // i-th weight to dequantize.
    uint16_t *w16 = weights_data;
    while(i < count) {
        float weight = from_brain(w16[i]);
        if (store_callback)
            store_callback(dst,i,weight);
        else
            f[i] = weight;
        i++;
    }
 }
 /* Convert the specified tensor (quantized or not) into an array of
 * floats. The array is allocated with malloc(). If the tensor is already
 * in FP32 floats format, it is just memcpy()-ed to the destination array.
@@ -885,10 +923,13 @@ void gguf_f16_to_float(void *weights_data, float *dst, uint64_t count, store_flo
 * NULL is returned as well, but errno is set to EINVAL. */
 float *gguf_tensor_to_float(gguf_tensor *tensor) {
    float *f = malloc(tensor->num_weights*sizeof(float));
    if (!f) return NULL;
    if (tensor->type == GGUF_TYPE_F32) {
        memcpy(f, tensor->weights_data, tensor->num_weights*sizeof(float));
    } else if (tensor->type == GGUF_TYPE_F16) {
        gguf_f16_to_float(tensor->weights_data, f, tensor->num_weights, NULL);
    } else if (tensor->type == GGUF_TYPE_BF16) {
        gguf_bf16_to_float(tensor->weights_data, f, tensor->num_weights, NULL);
    } else if (tensor->type == GGUF_TYPE_Q8_0) {
        gguf_q8_0_to_float(tensor->weights_data, f, tensor->num_weights, NULL);
    } else if (tensor->type == GGUF_TYPE_Q4_K) {
@@ -913,12 +954,15 @@ float *gguf_tensor_to_float(gguf_tensor *tensor) {
 * an array of int16_t values. */
 int16_t *gguf_tensor_to_f16(gguf_tensor *tensor) {
    int16_t *f16 = malloc(tensor->num_weights*sizeof(int16_t));
    if (!f16) return NULL;
    if (tensor->type == GGUF_TYPE_F32) {
        float *f = (float*)tensor->weights_data;
        for (uint64_t j = 0; j < tensor->num_weights; j++)
            f16[j] = to_half(f[j]);
    } else if (tensor->type == GGUF_TYPE_F16) {
        memcpy(f16, tensor->weights_data, tensor->num_weights*sizeof(int16_t));
    } else if (tensor->type == GGUF_TYPE_BF16) {
        gguf_bf16_to_float(tensor->weights_data, f16, tensor->num_weights, gguf_store_f16_callback);
    } else if (tensor->type == GGUF_TYPE_Q8_0) {
        gguf_q8_0_to_float(tensor->weights_data, f16, tensor->num_weights, gguf_store_f16_callback);
    } else if (tensor->type == GGUF_TYPE_Q4_K) {
@@ -938,3 +982,36 @@ int16_t *gguf_tensor_to_f16(gguf_tensor *tensor) {
    }
    return f16;
 }
 /* Same as gguf_tensor_to_float() but the result will be an bf16 tensor, that is
 * an array of int16_t values. */
 int16_t *gguf_tensor_to_bf16(gguf_tensor *tensor) {
    int16_t *f16 = malloc(tensor->num_weights*sizeof(int16_t));
    if (!f16) return NULL;
    if (tensor->type == GGUF_TYPE_F32) {
        float *f = (float*)tensor->weights_data;
        for (uint64_t j = 0; j < tensor->num_weights; j++)
            f16[j] = to_half(f[j]);
    } else if (tensor->type == GGUF_TYPE_F16) {
        gguf_f16_to_float(tensor->weights_data, f16, tensor->num_weights, gguf_store_bf16_callback);
    } else if (tensor->type == GGUF_TYPE_BF16) {
        memcpy(f16, tensor->weights_data, tensor->num_weights*sizeof(int16_t));
    } else if (tensor->type == GGUF_TYPE_Q8_0) {
        gguf_q8_0_to_float(tensor->weights_data, f16, tensor->num_weights, gguf_store_bf16_callback);
    } else if (tensor->type == GGUF_TYPE_Q4_K) {
        gguf_q4_k_to_float(tensor->weights_data, f16, tensor->num_weights, gguf_store_bf16_callback);
    } else if (tensor->type == GGUF_TYPE_Q6_K) {
        gguf_q6_k_to_float(tensor->weights_data, f16, tensor->num_weights, gguf_store_bf16_callback);
    } else if (tensor->type == GGUF_TYPE_Q2_K) {
        gguf_q2_k_to_float(tensor->weights_data, f16, tensor->num_weights, gguf_store_bf16_callback);
    } else if (tensor->type == GGUF_TYPE_Q4_0) {
        gguf_q4_0_to_float(tensor->weights_data, f16, tensor->num_weights, gguf_store_bf16_callback);
    } else if (tensor->type == GGUF_TYPE_Q4_1) {
        gguf_q4_1_to_float(tensor->weights_data, f16, tensor->num_weights, gguf_store_bf16_callback);
    } else {
        free(f16);
        errno = EINVAL;
        return NULL;
    }
    return f16;
 }
--- a/gguflib.h
+++ b/gguflib.h
@@ -27,16 +27,27 @@ enum gguf_tensor_type {
    GGUF_TYPE_Q5_1 = 7,
    GGUF_TYPE_Q8_0 = 8,
    GGUF_TYPE_Q8_1 = 9,
    // k-quantizations
    GGUF_TYPE_Q2_K = 10,
    GGUF_TYPE_Q3_K = 11,
    GGUF_TYPE_Q4_K = 12,
    GGUF_TYPE_Q5_K = 13,
    GGUF_TYPE_Q6_K = 14,
    GGUF_TYPE_Q8_K = 15,
-    GGUF_TYPE_I8,
+    GGUF_TYPE_IQ2_XXS = 16,
-    GGUF_TYPE_I16,
+    GGUF_TYPE_IQ2_XS = 17,
-    GGUF_TYPE_I32,
+    GGUF_TYPE_IQ3_XXS = 18,
    GGUF_TYPE_IQ1_S = 19,
    GGUF_TYPE_IQ4_NL = 20,
    GGUF_TYPE_IQ3_S = 21,
    GGUF_TYPE_IQ2_S = 22,
    GGUF_TYPE_IQ4_XS = 23,
    GGUF_TYPE_I8 = 24,
    GGUF_TYPE_I16 = 25,
    GGUF_TYPE_I32 = 26,
    GGUF_TYPE_I64 = 27,
    GGUF_TYPE_F64 = 28,
    GGUF_TYPE_IQ1_M = 29,
    GGUF_TYPE_BF16 = 30,
    GGUF_TYPE_COUNT,
 };
@@ -185,5 +196,6 @@ uint64_t gguf_get_alignment_padding(uint64_t alignment, uint64_t offset);
 void gguf_skip_key_values_section(gguf_ctx *ctx);
 float *gguf_tensor_to_float(gguf_tensor *tensor);
 int16_t *gguf_tensor_to_f16(gguf_tensor *tensor);
 int16_t *gguf_tensor_to_bf16(gguf_tensor *tensor);
 #endif