FP16 added. Split-mixtral improved.

This commit is contained in:
antirez
2023-12-27 15:13:42 +01:00
parent a77a4d061c
commit bd4ecbda94
6 changed files with 144 additions and 8 deletions

View File

@@ -1,7 +1,7 @@
all: gguf-tools all: gguf-tools
gguf-tools: gguf-tools.c gguflib.c gguflib.h sds.c sds.h sdsalloc.h gguf-tools: gguf-tools.c gguflib.c gguflib.h sds.c sds.h sdsalloc.h fp16.h
$(CC) gguf-tools.c gguflib.c sds.c \ $(CC) gguf-tools.c gguflib.c sds.c fp16.c \
-g -ggdb -Wall -W -pedantic -O2 -o gguf-tools -g -ggdb -Wall -W -pedantic -O2 -o gguf-tools
clean: clean:

View File

@@ -1,6 +1,18 @@
# GGUF tools # GGUF tools
Todo... still not clear what this is going to be. This is a work in progress library to manipulate GGUF files.
The program 'gguf-tools' use the library to implement both useful and
useless stuff, to show the library usage.
gguf-tools show file.gguf
shows detailed info about the GGUF file. This will include all the key-value pairs, including arrays, and detailed tensors informations. Tensor offsets will be relative to the start *of the file*, not the start of the data section like in the GGUF format (absolute file offsets are more useful and simpler to use).
gguf-tools split-mixtral 65230776370407150546470161412165 mixtral.gguf out.gguf
Extracts a 7B model `out.gguf` from Mixtral 7B MoE using the specified MoE ID for each layer (there are 32 digits in the sequence 652...).
Note that split-mixtral is quite useless as models obtained in this way will not perform any useful action. This is just an experiment and a non trivial task to show how to use the library. Likely it will be removed soon, once I have more interesting and useful examples to show.
## Specification documents ## Specification documents

85
fp16.c Normal file
View File

@@ -0,0 +1,85 @@
#include <stdint.h>
#include <math.h>
/* This code comes originally from:
* https://github.com/Maratyszcza/FP16/blob/master/include/fp16/fp16.h
*
* The original code is MIT licensed. */
static inline float fp32_from_bits(uint32_t w) {
union {
uint32_t as_bits;
float as_value;
} fp32;
fp32.as_bits = w;
return fp32.as_value;
}
static inline uint32_t fp32_to_bits(float f) {
union {
float as_value;
uint32_t as_bits;
} fp32;
fp32.as_value = f;
return fp32.as_bits;
}
float from_half(uint16_t h) {
const uint32_t w = (uint32_t) h << 16;
const uint32_t sign = w & UINT32_C(0x80000000);
const uint32_t two_w = w + w;
const uint32_t exp_offset = UINT32_C(0xE0) << 23;
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
const float exp_scale = 0x1.0p-112f;
#else
const float exp_scale = fp32_from_bits(UINT32_C(0x7800000));
#endif
const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale;
const uint32_t magic_mask = UINT32_C(126) << 23;
const float magic_bias = 0.5f;
const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias;
const uint32_t denormalized_cutoff = UINT32_C(1) << 27;
const uint32_t result = sign |
(two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value));
return fp32_from_bits(result);
}
uint16_t to_half(float f) {
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
const float scale_to_inf = 0x1.0p+112f;
const float scale_to_zero = 0x1.0p-110f;
#else
const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000));
const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000));
#endif
float base = (fabsf(f) * scale_to_inf) * scale_to_zero;
const uint32_t w = fp32_to_bits(f);
const uint32_t shl1_w = w + w;
const uint32_t sign = w & UINT32_C(0x80000000);
uint32_t bias = shl1_w & UINT32_C(0xFF000000);
if (bias < UINT32_C(0x71000000)) {
bias = UINT32_C(0x71000000);
}
base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base;
const uint32_t bits = fp32_to_bits(base);
const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00);
const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF);
const uint32_t nonsign = exp_bits + mantissa_bits;
return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign);
}
#ifdef TEST_MAIN
#include <stdio.h>
int main(void) {
float f = 1.2345;
uint16_t half = to_half(f);
float f2 = from_half(half);
printf("%f %f\n", f, f2);
return 0;
}
#endif

5
fp16.h Normal file
View File

@@ -0,0 +1,5 @@
#ifdef FP16_h
#define FP16_h
float from_half(uint16_t h);
uint16_t to_half(float f);
#endif

View File

@@ -169,8 +169,9 @@ void gguf_tools_show(const char *filename) {
} }
/* Read a Mixtral MoE model and creates a new non-MoE GGUF file based /* Read a Mixtral MoE model and creates a new non-MoE GGUF file based
* on the weights of the expert with id 'expert_id'. */ * on the weights of the experts with IDs in the array of 'experts_id'.
void gguf_tools_split_mixtral(int expert_id, const char *mixtral_filename, const char *output_filename) { * The array must contain 32 integers, one for each layer. */
void gguf_tools_split_mixtral(int *experts_id, const char *mixtral_filename, const char *output_filename) {
gguf_ctx *mixtral = gguf_init(mixtral_filename); gguf_ctx *mixtral = gguf_init(mixtral_filename);
if (mixtral == NULL) { if (mixtral == NULL) {
perror("Opening Mixtral file"); perror("Opening Mixtral file");
@@ -238,6 +239,20 @@ void gguf_tools_split_mixtral(int expert_id, const char *mixtral_filename, const
/* The tensor is a feed-forward tensor? We want to copy only /* The tensor is a feed-forward tensor? We want to copy only
* the ones of our expert ID. */ * the ones of our expert ID. */
if (strstr(tn,".ffn_") != NULL && strstr(tn,".ffn_norm") == NULL) { if (strstr(tn,".ffn_") != NULL && strstr(tn,".ffn_norm") == NULL) {
/* Extract which block this FFN belongs. */
int block;
assert(memcmp(tn,"blk.",4) == 0); // Must start with blk.<block>
char *p = strchr(tn+4,'.');
assert(p != NULL);
*p = 0;
block = atoi(tn+4);
*p = '.';
assert(block >= 0 && block < 32);
/* Now that we have the block, we can select the corresponding
* expert ID we want to use for this block. */
int expert_id = experts_id[block];
char match[32]; char match[32];
snprintf(match,sizeof(match),".%d.weight",expert_id); snprintf(match,sizeof(match),".%d.weight",expert_id);
char *match_ptr = strstr(tn,match); char *match_ptr = strstr(tn,match);
@@ -283,7 +298,8 @@ void gguf_tools_split_mixtral(int expert_id, const char *mixtral_filename, const
/* Finally, append the tensors weights. */ /* Finally, append the tensors weights. */
for (uint32_t j = 0; j < num_tensors; j++) { for (uint32_t j = 0; j < num_tensors; j++) {
printf("Writing tensor %s\n", tensors[j].dest_name); printf("Writing tensor %s (weights from %.*s)\n", tensors[j].dest_name,
(int)tensors[j].orig_info.namelen, tensors[j].orig_info.name);
if (gguf_append_tensor_data(output,tensors[j].orig_info.weights_data, if (gguf_append_tensor_data(output,tensors[j].orig_info.weights_data,
tensors[j].orig_info.bsize) == 0) tensors[j].orig_info.bsize) == 0)
{ {
@@ -300,7 +316,9 @@ void gguf_tools_usage(const char *progname) {
printf("Usage: %s <subcommand> [options...]\n" printf("Usage: %s <subcommand> [options...]\n"
"Subcommands:\n" "Subcommands:\n"
" show <filename> -- show GGUF model keys and tensors.\n" " show <filename> -- show GGUF model keys and tensors.\n"
" split-mixtral <id> mixtral.gguf out.gguf -- extract expert.\n" " split-mixtral <ids...> mixtral.gguf out.gguf -- extract expert.\n"
"Example:\n"
" split-mixtral 65230776370407150546470161412165 mixtral.gguf out.gguf\n"
, progname); , progname);
exit(1); exit(1);
} }
@@ -311,7 +329,22 @@ int main(int argc, char **argv) {
if (!strcmp(argv[1],"show") && argc == 3) { if (!strcmp(argv[1],"show") && argc == 3) {
gguf_tools_show(argv[2]); gguf_tools_show(argv[2]);
} else if (!strcmp(argv[1],"split-mixtral") && argc == 5) { } else if (!strcmp(argv[1],"split-mixtral") && argc == 5) {
gguf_tools_split_mixtral(atoi(argv[2]),argv[3],argv[4]); int experts[32];
size_t elen = strlen(argv[2]);
for (size_t j = 0; j < 32; j++) {
if (j < elen) {
experts[j] = argv[2][j] - '0';
if (experts[j] < 0 || experts[j] > 7) {
fprintf(stderr,"Invalid expert ID: %d\n", experts[j]);
exit(1);
}
} else {
/* If there aren't 32 digits in the input, use the last
* one repeated up to the last layer. */
experts[j] = j > 1 ? experts[j-1] : 0;
}
}
gguf_tools_split_mixtral(experts,argv[3],argv[4]);
} else { } else {
gguf_tools_usage(argv[0]); gguf_tools_usage(argv[0]);
} }

View File

@@ -6,6 +6,7 @@
*/ */
#ifndef GGUFLIB_H #ifndef GGUFLIB_H
#define GGUFLIB_H
#include <stdint.h> #include <stdint.h>