FP16 added. Split-mixtral improved.

2025-12-16 00:18:52 +08:00 · 2023-12-27 15:13:42 +01:00
parent a77a4d061c
commit bd4ecbda94
6 changed files with 144 additions and 8 deletions
--- a/4
+++ b/4
@@ -1,7 +1,7 @@
 all: gguf-tools
-gguf-tools: gguf-tools.c gguflib.c gguflib.h sds.c sds.h sdsalloc.h
+gguf-tools: gguf-tools.c gguflib.c gguflib.h sds.c sds.h sdsalloc.h fp16.h
-	$(CC) gguf-tools.c gguflib.c sds.c \
+	$(CC) gguf-tools.c gguflib.c sds.c fp16.c \
 		-g -ggdb -Wall -W -pedantic -O2 -o gguf-tools
 clean:
--- a/README.md
+++ b/README.md
@@ -1,6 +1,18 @@
 # GGUF tools
-Todo... still not clear what this is going to be.
+This is a work in progress library to manipulate GGUF files.
 The program 'gguf-tools' use the library to implement both useful and
 useless stuff, to show the library usage.
    gguf-tools show file.gguf
 shows detailed info about the GGUF file. This will include all the key-value pairs, including arrays, and detailed tensors informations. Tensor offsets will be relative to the start *of the file*, not the start of the data section like in the GGUF format (absolute file offsets are more useful and simpler to use).
    gguf-tools split-mixtral 65230776370407150546470161412165 mixtral.gguf out.gguf
 Extracts a 7B model `out.gguf` from Mixtral 7B MoE using the specified MoE ID for each layer (there are 32 digits in the sequence 652...).
 Note that split-mixtral is quite useless as models obtained in this way will not perform any useful action. This is just an experiment and a non trivial task to show how to use the library. Likely it will be removed soon, once I have more interesting and useful examples to show.
 ## Specification documents
--- a/fp16.c
+++ b/fp16.c
@@ -0,0 +1,85 @@
 #include <stdint.h>
 #include <math.h>
 /* This code comes originally from:
 * https://github.com/Maratyszcza/FP16/blob/master/include/fp16/fp16.h
 *
 * The original code is MIT licensed. */
 static inline float fp32_from_bits(uint32_t w) {
    union {
        uint32_t as_bits;
        float as_value;
    } fp32;
    fp32.as_bits = w;
    return fp32.as_value;
 }
 static inline uint32_t fp32_to_bits(float f) {
    union {
        float as_value;
        uint32_t as_bits;
    } fp32;
    fp32.as_value = f;
    return fp32.as_bits;
 }
 float from_half(uint16_t h) {
    const uint32_t w = (uint32_t) h << 16;
    const uint32_t sign = w & UINT32_C(0x80000000);
    const uint32_t two_w = w + w;
    const uint32_t exp_offset = UINT32_C(0xE0) << 23;
 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
    const float exp_scale = 0x1.0p-112f;
 #else
    const float exp_scale = fp32_from_bits(UINT32_C(0x7800000));
 #endif
    const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale;
    const uint32_t magic_mask = UINT32_C(126) << 23;
    const float magic_bias = 0.5f;
    const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias;
    const uint32_t denormalized_cutoff = UINT32_C(1) << 27;
    const uint32_t result = sign |
        (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value));
    return fp32_from_bits(result);
 }
 uint16_t to_half(float f) {
 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
    const float scale_to_inf = 0x1.0p+112f;
    const float scale_to_zero = 0x1.0p-110f;
 #else
    const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000));
    const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000));
 #endif
    float base = (fabsf(f) * scale_to_inf) * scale_to_zero;
    const uint32_t w = fp32_to_bits(f);
    const uint32_t shl1_w = w + w;
    const uint32_t sign = w & UINT32_C(0x80000000);
    uint32_t bias = shl1_w & UINT32_C(0xFF000000);
    if (bias < UINT32_C(0x71000000)) {
        bias = UINT32_C(0x71000000);
    }
    base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base;
    const uint32_t bits = fp32_to_bits(base);
    const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00);
    const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF);
    const uint32_t nonsign = exp_bits + mantissa_bits;
    return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign);
 }
 #ifdef TEST_MAIN
 #include <stdio.h>
 int main(void) {
    float f = 1.2345;
    uint16_t half = to_half(f);
    float f2 = from_half(half);
    printf("%f %f\n", f, f2);
    return 0;
 }
 #endif
--- a/fp16.h
+++ b/fp16.h
@@ -0,0 +1,5 @@
 #ifdef FP16_h
 #define FP16_h
 float from_half(uint16_t h);
 uint16_t to_half(float f);
 #endif
--- a/gguf-tools.c
+++ b/gguf-tools.c
@@ -169,8 +169,9 @@ void gguf_tools_show(const char *filename) {
 }
 /* Read a Mixtral MoE model and creates a new non-MoE GGUF file based
- * on the weights of the expert with id 'expert_id'. */
+ * on the weights of the experts with IDs in the array of 'experts_id'.
-void gguf_tools_split_mixtral(int expert_id, const char *mixtral_filename, const char *output_filename) {
+ * The array must contain 32 integers, one for each layer. */
 void gguf_tools_split_mixtral(int *experts_id, const char *mixtral_filename, const char *output_filename) {
    gguf_ctx *mixtral = gguf_init(mixtral_filename);
    if (mixtral == NULL) {
        perror("Opening Mixtral file");
@@ -238,6 +239,20 @@ void gguf_tools_split_mixtral(int expert_id, const char *mixtral_filename, const
        /* The tensor is a feed-forward tensor? We want to copy only
         * the ones of our expert ID. */
        if (strstr(tn,".ffn_") != NULL && strstr(tn,".ffn_norm") == NULL) {
            /* Extract which block this FFN belongs. */
            int block;
            assert(memcmp(tn,"blk.",4) == 0); // Must start with blk.<block>
            char *p = strchr(tn+4,'.');
            assert(p != NULL);
            *p = 0;
            block = atoi(tn+4);
            *p = '.';
            assert(block >= 0 && block < 32);
            /* Now that we have the block, we can select the corresponding
             * expert ID we want to use for this block. */
            int expert_id = experts_id[block];
            char match[32];
            snprintf(match,sizeof(match),".%d.weight",expert_id);
            char *match_ptr = strstr(tn,match);
@@ -283,7 +298,8 @@ void gguf_tools_split_mixtral(int expert_id, const char *mixtral_filename, const
    /* Finally, append the tensors weights. */
    for (uint32_t j = 0; j < num_tensors; j++) {
-        printf("Writing tensor %s\n", tensors[j].dest_name);
+        printf("Writing tensor %s (weights from %.*s)\n", tensors[j].dest_name,
            (int)tensors[j].orig_info.namelen, tensors[j].orig_info.name);
        if (gguf_append_tensor_data(output,tensors[j].orig_info.weights_data,
            tensors[j].orig_info.bsize) == 0)
        {
@@ -300,7 +316,9 @@ void gguf_tools_usage(const char *progname) {
    printf("Usage: %s <subcommand> [options...]\n"
           "Subcommands:\n"
           "  show <filename> -- show GGUF model keys and tensors.\n"
-           "  split-mixtral <id> mixtral.gguf out.gguf -- extract expert.\n"
+           "  split-mixtral <ids...> mixtral.gguf out.gguf -- extract expert.\n"
           "Example:\n"
           "  split-mixtral 65230776370407150546470161412165 mixtral.gguf out.gguf\n"
           , progname);
    exit(1);
 }
@@ -311,7 +329,22 @@ int main(int argc, char **argv) {
    if (!strcmp(argv[1],"show") && argc == 3) {
        gguf_tools_show(argv[2]);
    } else if (!strcmp(argv[1],"split-mixtral") && argc == 5) {
-        gguf_tools_split_mixtral(atoi(argv[2]),argv[3],argv[4]);
+        int experts[32];
        size_t elen = strlen(argv[2]);
        for (size_t j = 0; j < 32; j++) {
            if (j < elen) {
                experts[j] = argv[2][j] - '0';
                if (experts[j] < 0 || experts[j] > 7) {
                    fprintf(stderr,"Invalid expert ID: %d\n", experts[j]);
                    exit(1);
                }
            } else {
                /* If there aren't 32 digits in the input, use the last
                 * one repeated up to the last layer. */
                experts[j] = j > 1 ? experts[j-1] : 0;
            }
        }
        gguf_tools_split_mixtral(experts,argv[3],argv[4]);
    } else {
        gguf_tools_usage(argv[0]);
    }
--- a/gguflib.h
+++ b/gguflib.h
@@ -6,6 +6,7 @@
 */
 #ifndef GGUFLIB_H
 #define GGUFLIB_H
 #include <stdint.h>