From bd4ecbda94c5108712a238e276252819b384f4cb Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 27 Dec 2023 15:13:42 +0100 Subject: [PATCH] FP16 added. Split-mixtral improved. --- Makefile | 4 +-- README.md | 14 ++++++++- fp16.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++ fp16.h | 5 ++++ gguf-tools.c | 43 ++++++++++++++++++++++---- gguflib.h | 1 + 6 files changed, 144 insertions(+), 8 deletions(-) create mode 100644 fp16.c create mode 100644 fp16.h diff --git a/Makefile b/Makefile index 358f488..3a29ffe 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ all: gguf-tools -gguf-tools: gguf-tools.c gguflib.c gguflib.h sds.c sds.h sdsalloc.h - $(CC) gguf-tools.c gguflib.c sds.c \ +gguf-tools: gguf-tools.c gguflib.c gguflib.h sds.c sds.h sdsalloc.h fp16.h + $(CC) gguf-tools.c gguflib.c sds.c fp16.c \ -g -ggdb -Wall -W -pedantic -O2 -o gguf-tools clean: diff --git a/README.md b/README.md index 96248cf..5933ac0 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,18 @@ # GGUF tools -Todo... still not clear what this is going to be. +This is a work in progress library to manipulate GGUF files. +The program 'gguf-tools' use the library to implement both useful and +useless stuff, to show the library usage. + + gguf-tools show file.gguf + +shows detailed info about the GGUF file. This will include all the key-value pairs, including arrays, and detailed tensors informations. Tensor offsets will be relative to the start *of the file*, not the start of the data section like in the GGUF format (absolute file offsets are more useful and simpler to use). + + gguf-tools split-mixtral 65230776370407150546470161412165 mixtral.gguf out.gguf + +Extracts a 7B model `out.gguf` from Mixtral 7B MoE using the specified MoE ID for each layer (there are 32 digits in the sequence 652...). + +Note that split-mixtral is quite useless as models obtained in this way will not perform any useful action. This is just an experiment and a non trivial task to show how to use the library. Likely it will be removed soon, once I have more interesting and useful examples to show. ## Specification documents diff --git a/fp16.c b/fp16.c new file mode 100644 index 0000000..9a62719 --- /dev/null +++ b/fp16.c @@ -0,0 +1,85 @@ +#include +#include + +/* This code comes originally from: + * https://github.com/Maratyszcza/FP16/blob/master/include/fp16/fp16.h + * + * The original code is MIT licensed. */ + +static inline float fp32_from_bits(uint32_t w) { + union { + uint32_t as_bits; + float as_value; + } fp32; + fp32.as_bits = w; + return fp32.as_value; +} + +static inline uint32_t fp32_to_bits(float f) { + union { + float as_value; + uint32_t as_bits; + } fp32; + fp32.as_value = f; + return fp32.as_bits; +} + +float from_half(uint16_t h) { + const uint32_t w = (uint32_t) h << 16; + const uint32_t sign = w & UINT32_C(0x80000000); + const uint32_t two_w = w + w; + + const uint32_t exp_offset = UINT32_C(0xE0) << 23; +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__) + const float exp_scale = 0x1.0p-112f; +#else + const float exp_scale = fp32_from_bits(UINT32_C(0x7800000)); +#endif + const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale; + + const uint32_t magic_mask = UINT32_C(126) << 23; + const float magic_bias = 0.5f; + const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias; + + const uint32_t denormalized_cutoff = UINT32_C(1) << 27; + const uint32_t result = sign | + (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value)); + return fp32_from_bits(result); +} + +uint16_t to_half(float f) { +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__) + const float scale_to_inf = 0x1.0p+112f; + const float scale_to_zero = 0x1.0p-110f; +#else + const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000)); + const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000)); +#endif + float base = (fabsf(f) * scale_to_inf) * scale_to_zero; + + const uint32_t w = fp32_to_bits(f); + const uint32_t shl1_w = w + w; + const uint32_t sign = w & UINT32_C(0x80000000); + uint32_t bias = shl1_w & UINT32_C(0xFF000000); + if (bias < UINT32_C(0x71000000)) { + bias = UINT32_C(0x71000000); + } + + base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base; + const uint32_t bits = fp32_to_bits(base); + const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00); + const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF); + const uint32_t nonsign = exp_bits + mantissa_bits; + return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign); +} + +#ifdef TEST_MAIN +#include +int main(void) { + float f = 1.2345; + uint16_t half = to_half(f); + float f2 = from_half(half); + printf("%f %f\n", f, f2); + return 0; +} +#endif diff --git a/fp16.h b/fp16.h new file mode 100644 index 0000000..32543b6 --- /dev/null +++ b/fp16.h @@ -0,0 +1,5 @@ +#ifdef FP16_h +#define FP16_h +float from_half(uint16_t h); +uint16_t to_half(float f); +#endif diff --git a/gguf-tools.c b/gguf-tools.c index aa012f9..0b0521d 100644 --- a/gguf-tools.c +++ b/gguf-tools.c @@ -169,8 +169,9 @@ void gguf_tools_show(const char *filename) { } /* Read a Mixtral MoE model and creates a new non-MoE GGUF file based - * on the weights of the expert with id 'expert_id'. */ -void gguf_tools_split_mixtral(int expert_id, const char *mixtral_filename, const char *output_filename) { + * on the weights of the experts with IDs in the array of 'experts_id'. + * The array must contain 32 integers, one for each layer. */ +void gguf_tools_split_mixtral(int *experts_id, const char *mixtral_filename, const char *output_filename) { gguf_ctx *mixtral = gguf_init(mixtral_filename); if (mixtral == NULL) { perror("Opening Mixtral file"); @@ -238,6 +239,20 @@ void gguf_tools_split_mixtral(int expert_id, const char *mixtral_filename, const /* The tensor is a feed-forward tensor? We want to copy only * the ones of our expert ID. */ if (strstr(tn,".ffn_") != NULL && strstr(tn,".ffn_norm") == NULL) { + /* Extract which block this FFN belongs. */ + int block; + assert(memcmp(tn,"blk.",4) == 0); // Must start with blk. + char *p = strchr(tn+4,'.'); + assert(p != NULL); + *p = 0; + block = atoi(tn+4); + *p = '.'; + assert(block >= 0 && block < 32); + + /* Now that we have the block, we can select the corresponding + * expert ID we want to use for this block. */ + int expert_id = experts_id[block]; + char match[32]; snprintf(match,sizeof(match),".%d.weight",expert_id); char *match_ptr = strstr(tn,match); @@ -283,7 +298,8 @@ void gguf_tools_split_mixtral(int expert_id, const char *mixtral_filename, const /* Finally, append the tensors weights. */ for (uint32_t j = 0; j < num_tensors; j++) { - printf("Writing tensor %s\n", tensors[j].dest_name); + printf("Writing tensor %s (weights from %.*s)\n", tensors[j].dest_name, + (int)tensors[j].orig_info.namelen, tensors[j].orig_info.name); if (gguf_append_tensor_data(output,tensors[j].orig_info.weights_data, tensors[j].orig_info.bsize) == 0) { @@ -300,7 +316,9 @@ void gguf_tools_usage(const char *progname) { printf("Usage: %s [options...]\n" "Subcommands:\n" " show -- show GGUF model keys and tensors.\n" - " split-mixtral mixtral.gguf out.gguf -- extract expert.\n" + " split-mixtral mixtral.gguf out.gguf -- extract expert.\n" + "Example:\n" + " split-mixtral 65230776370407150546470161412165 mixtral.gguf out.gguf\n" , progname); exit(1); } @@ -311,7 +329,22 @@ int main(int argc, char **argv) { if (!strcmp(argv[1],"show") && argc == 3) { gguf_tools_show(argv[2]); } else if (!strcmp(argv[1],"split-mixtral") && argc == 5) { - gguf_tools_split_mixtral(atoi(argv[2]),argv[3],argv[4]); + int experts[32]; + size_t elen = strlen(argv[2]); + for (size_t j = 0; j < 32; j++) { + if (j < elen) { + experts[j] = argv[2][j] - '0'; + if (experts[j] < 0 || experts[j] > 7) { + fprintf(stderr,"Invalid expert ID: %d\n", experts[j]); + exit(1); + } + } else { + /* If there aren't 32 digits in the input, use the last + * one repeated up to the last layer. */ + experts[j] = j > 1 ? experts[j-1] : 0; + } + } + gguf_tools_split_mixtral(experts,argv[3],argv[4]); } else { gguf_tools_usage(argv[0]); } diff --git a/gguflib.h b/gguflib.h index cf10367..09f0998 100644 --- a/gguflib.h +++ b/gguflib.h @@ -6,6 +6,7 @@ */ #ifndef GGUFLIB_H +#define GGUFLIB_H #include