mirror of
https://github.com/antirez/gguf-tools.git
synced 2025-09-17 02:28:07 +08:00
FP16 added. Split-mixtral improved.
This commit is contained in:
4
Makefile
4
Makefile
@@ -1,7 +1,7 @@
|
|||||||
all: gguf-tools
|
all: gguf-tools
|
||||||
|
|
||||||
gguf-tools: gguf-tools.c gguflib.c gguflib.h sds.c sds.h sdsalloc.h
|
gguf-tools: gguf-tools.c gguflib.c gguflib.h sds.c sds.h sdsalloc.h fp16.h
|
||||||
$(CC) gguf-tools.c gguflib.c sds.c \
|
$(CC) gguf-tools.c gguflib.c sds.c fp16.c \
|
||||||
-g -ggdb -Wall -W -pedantic -O2 -o gguf-tools
|
-g -ggdb -Wall -W -pedantic -O2 -o gguf-tools
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
|
14
README.md
14
README.md
@@ -1,6 +1,18 @@
|
|||||||
# GGUF tools
|
# GGUF tools
|
||||||
|
|
||||||
Todo... still not clear what this is going to be.
|
This is a work in progress library to manipulate GGUF files.
|
||||||
|
The program 'gguf-tools' use the library to implement both useful and
|
||||||
|
useless stuff, to show the library usage.
|
||||||
|
|
||||||
|
gguf-tools show file.gguf
|
||||||
|
|
||||||
|
shows detailed info about the GGUF file. This will include all the key-value pairs, including arrays, and detailed tensors informations. Tensor offsets will be relative to the start *of the file*, not the start of the data section like in the GGUF format (absolute file offsets are more useful and simpler to use).
|
||||||
|
|
||||||
|
gguf-tools split-mixtral 65230776370407150546470161412165 mixtral.gguf out.gguf
|
||||||
|
|
||||||
|
Extracts a 7B model `out.gguf` from Mixtral 7B MoE using the specified MoE ID for each layer (there are 32 digits in the sequence 652...).
|
||||||
|
|
||||||
|
Note that split-mixtral is quite useless as models obtained in this way will not perform any useful action. This is just an experiment and a non trivial task to show how to use the library. Likely it will be removed soon, once I have more interesting and useful examples to show.
|
||||||
|
|
||||||
## Specification documents
|
## Specification documents
|
||||||
|
|
||||||
|
85
fp16.c
Normal file
85
fp16.c
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
#include <stdint.h>
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
/* This code comes originally from:
|
||||||
|
* https://github.com/Maratyszcza/FP16/blob/master/include/fp16/fp16.h
|
||||||
|
*
|
||||||
|
* The original code is MIT licensed. */
|
||||||
|
|
||||||
|
static inline float fp32_from_bits(uint32_t w) {
|
||||||
|
union {
|
||||||
|
uint32_t as_bits;
|
||||||
|
float as_value;
|
||||||
|
} fp32;
|
||||||
|
fp32.as_bits = w;
|
||||||
|
return fp32.as_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint32_t fp32_to_bits(float f) {
|
||||||
|
union {
|
||||||
|
float as_value;
|
||||||
|
uint32_t as_bits;
|
||||||
|
} fp32;
|
||||||
|
fp32.as_value = f;
|
||||||
|
return fp32.as_bits;
|
||||||
|
}
|
||||||
|
|
||||||
|
float from_half(uint16_t h) {
|
||||||
|
const uint32_t w = (uint32_t) h << 16;
|
||||||
|
const uint32_t sign = w & UINT32_C(0x80000000);
|
||||||
|
const uint32_t two_w = w + w;
|
||||||
|
|
||||||
|
const uint32_t exp_offset = UINT32_C(0xE0) << 23;
|
||||||
|
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
|
||||||
|
const float exp_scale = 0x1.0p-112f;
|
||||||
|
#else
|
||||||
|
const float exp_scale = fp32_from_bits(UINT32_C(0x7800000));
|
||||||
|
#endif
|
||||||
|
const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale;
|
||||||
|
|
||||||
|
const uint32_t magic_mask = UINT32_C(126) << 23;
|
||||||
|
const float magic_bias = 0.5f;
|
||||||
|
const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias;
|
||||||
|
|
||||||
|
const uint32_t denormalized_cutoff = UINT32_C(1) << 27;
|
||||||
|
const uint32_t result = sign |
|
||||||
|
(two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value));
|
||||||
|
return fp32_from_bits(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint16_t to_half(float f) {
|
||||||
|
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
|
||||||
|
const float scale_to_inf = 0x1.0p+112f;
|
||||||
|
const float scale_to_zero = 0x1.0p-110f;
|
||||||
|
#else
|
||||||
|
const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000));
|
||||||
|
const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000));
|
||||||
|
#endif
|
||||||
|
float base = (fabsf(f) * scale_to_inf) * scale_to_zero;
|
||||||
|
|
||||||
|
const uint32_t w = fp32_to_bits(f);
|
||||||
|
const uint32_t shl1_w = w + w;
|
||||||
|
const uint32_t sign = w & UINT32_C(0x80000000);
|
||||||
|
uint32_t bias = shl1_w & UINT32_C(0xFF000000);
|
||||||
|
if (bias < UINT32_C(0x71000000)) {
|
||||||
|
bias = UINT32_C(0x71000000);
|
||||||
|
}
|
||||||
|
|
||||||
|
base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base;
|
||||||
|
const uint32_t bits = fp32_to_bits(base);
|
||||||
|
const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00);
|
||||||
|
const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF);
|
||||||
|
const uint32_t nonsign = exp_bits + mantissa_bits;
|
||||||
|
return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef TEST_MAIN
|
||||||
|
#include <stdio.h>
|
||||||
|
int main(void) {
|
||||||
|
float f = 1.2345;
|
||||||
|
uint16_t half = to_half(f);
|
||||||
|
float f2 = from_half(half);
|
||||||
|
printf("%f %f\n", f, f2);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
5
fp16.h
Normal file
5
fp16.h
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
#ifdef FP16_h
|
||||||
|
#define FP16_h
|
||||||
|
float from_half(uint16_t h);
|
||||||
|
uint16_t to_half(float f);
|
||||||
|
#endif
|
43
gguf-tools.c
43
gguf-tools.c
@@ -169,8 +169,9 @@ void gguf_tools_show(const char *filename) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Read a Mixtral MoE model and creates a new non-MoE GGUF file based
|
/* Read a Mixtral MoE model and creates a new non-MoE GGUF file based
|
||||||
* on the weights of the expert with id 'expert_id'. */
|
* on the weights of the experts with IDs in the array of 'experts_id'.
|
||||||
void gguf_tools_split_mixtral(int expert_id, const char *mixtral_filename, const char *output_filename) {
|
* The array must contain 32 integers, one for each layer. */
|
||||||
|
void gguf_tools_split_mixtral(int *experts_id, const char *mixtral_filename, const char *output_filename) {
|
||||||
gguf_ctx *mixtral = gguf_init(mixtral_filename);
|
gguf_ctx *mixtral = gguf_init(mixtral_filename);
|
||||||
if (mixtral == NULL) {
|
if (mixtral == NULL) {
|
||||||
perror("Opening Mixtral file");
|
perror("Opening Mixtral file");
|
||||||
@@ -238,6 +239,20 @@ void gguf_tools_split_mixtral(int expert_id, const char *mixtral_filename, const
|
|||||||
/* The tensor is a feed-forward tensor? We want to copy only
|
/* The tensor is a feed-forward tensor? We want to copy only
|
||||||
* the ones of our expert ID. */
|
* the ones of our expert ID. */
|
||||||
if (strstr(tn,".ffn_") != NULL && strstr(tn,".ffn_norm") == NULL) {
|
if (strstr(tn,".ffn_") != NULL && strstr(tn,".ffn_norm") == NULL) {
|
||||||
|
/* Extract which block this FFN belongs. */
|
||||||
|
int block;
|
||||||
|
assert(memcmp(tn,"blk.",4) == 0); // Must start with blk.<block>
|
||||||
|
char *p = strchr(tn+4,'.');
|
||||||
|
assert(p != NULL);
|
||||||
|
*p = 0;
|
||||||
|
block = atoi(tn+4);
|
||||||
|
*p = '.';
|
||||||
|
assert(block >= 0 && block < 32);
|
||||||
|
|
||||||
|
/* Now that we have the block, we can select the corresponding
|
||||||
|
* expert ID we want to use for this block. */
|
||||||
|
int expert_id = experts_id[block];
|
||||||
|
|
||||||
char match[32];
|
char match[32];
|
||||||
snprintf(match,sizeof(match),".%d.weight",expert_id);
|
snprintf(match,sizeof(match),".%d.weight",expert_id);
|
||||||
char *match_ptr = strstr(tn,match);
|
char *match_ptr = strstr(tn,match);
|
||||||
@@ -283,7 +298,8 @@ void gguf_tools_split_mixtral(int expert_id, const char *mixtral_filename, const
|
|||||||
|
|
||||||
/* Finally, append the tensors weights. */
|
/* Finally, append the tensors weights. */
|
||||||
for (uint32_t j = 0; j < num_tensors; j++) {
|
for (uint32_t j = 0; j < num_tensors; j++) {
|
||||||
printf("Writing tensor %s\n", tensors[j].dest_name);
|
printf("Writing tensor %s (weights from %.*s)\n", tensors[j].dest_name,
|
||||||
|
(int)tensors[j].orig_info.namelen, tensors[j].orig_info.name);
|
||||||
if (gguf_append_tensor_data(output,tensors[j].orig_info.weights_data,
|
if (gguf_append_tensor_data(output,tensors[j].orig_info.weights_data,
|
||||||
tensors[j].orig_info.bsize) == 0)
|
tensors[j].orig_info.bsize) == 0)
|
||||||
{
|
{
|
||||||
@@ -300,7 +316,9 @@ void gguf_tools_usage(const char *progname) {
|
|||||||
printf("Usage: %s <subcommand> [options...]\n"
|
printf("Usage: %s <subcommand> [options...]\n"
|
||||||
"Subcommands:\n"
|
"Subcommands:\n"
|
||||||
" show <filename> -- show GGUF model keys and tensors.\n"
|
" show <filename> -- show GGUF model keys and tensors.\n"
|
||||||
" split-mixtral <id> mixtral.gguf out.gguf -- extract expert.\n"
|
" split-mixtral <ids...> mixtral.gguf out.gguf -- extract expert.\n"
|
||||||
|
"Example:\n"
|
||||||
|
" split-mixtral 65230776370407150546470161412165 mixtral.gguf out.gguf\n"
|
||||||
, progname);
|
, progname);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
@@ -311,7 +329,22 @@ int main(int argc, char **argv) {
|
|||||||
if (!strcmp(argv[1],"show") && argc == 3) {
|
if (!strcmp(argv[1],"show") && argc == 3) {
|
||||||
gguf_tools_show(argv[2]);
|
gguf_tools_show(argv[2]);
|
||||||
} else if (!strcmp(argv[1],"split-mixtral") && argc == 5) {
|
} else if (!strcmp(argv[1],"split-mixtral") && argc == 5) {
|
||||||
gguf_tools_split_mixtral(atoi(argv[2]),argv[3],argv[4]);
|
int experts[32];
|
||||||
|
size_t elen = strlen(argv[2]);
|
||||||
|
for (size_t j = 0; j < 32; j++) {
|
||||||
|
if (j < elen) {
|
||||||
|
experts[j] = argv[2][j] - '0';
|
||||||
|
if (experts[j] < 0 || experts[j] > 7) {
|
||||||
|
fprintf(stderr,"Invalid expert ID: %d\n", experts[j]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* If there aren't 32 digits in the input, use the last
|
||||||
|
* one repeated up to the last layer. */
|
||||||
|
experts[j] = j > 1 ? experts[j-1] : 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
gguf_tools_split_mixtral(experts,argv[3],argv[4]);
|
||||||
} else {
|
} else {
|
||||||
gguf_tools_usage(argv[0]);
|
gguf_tools_usage(argv[0]);
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user