mirror of
https://github.com/antirez/gguf-tools.git
synced 2025-09-16 01:28:07 +08:00

Sometimes it's useful to get an overview of how tensors changes when using different quantization formats. For example: diff -u <(gguf-tools show --diffable ggml-model-bf16.gguf) \ <(gguf-tools show --diffable ggml-model-Q6_K.gguf) | less Is now able to produces nice clean output. Without this change, every line would have been different due to the file offsets and byte sizes which means `diff -u` would produce one gigantic unreadable chunk.
579 lines
19 KiB
C
579 lines
19 KiB
C
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <ctype.h>
|
|
#include <string.h>
|
|
#include <assert.h>
|
|
#include <errno.h>
|
|
#include <math.h>
|
|
#include <inttypes.h>
|
|
|
|
#include "gguflib.h"
|
|
#include "sds.h"
|
|
#include "fp16.h"
|
|
|
|
/* Global options that can could be used for all the subcommands. */
|
|
struct {
|
|
int verbose; // --verbose option
|
|
int diffable; // --diffable option
|
|
} Opt = {0};
|
|
|
|
/* ========================== Utility functions ============================ */
|
|
|
|
/* Glob-style pattern matching. Return 1 on match, 0 otherwise. */
|
|
int strmatch(const char *pattern, int patternLen,
|
|
const char *string, int stringLen, int nocase)
|
|
{
|
|
while(patternLen && stringLen) {
|
|
switch(pattern[0]) {
|
|
case '*':
|
|
while (patternLen && pattern[1] == '*') {
|
|
pattern++;
|
|
patternLen--;
|
|
}
|
|
if (patternLen == 1)
|
|
return 1; /* match */
|
|
while(stringLen) {
|
|
if (strmatch(pattern+1, patternLen-1,
|
|
string, stringLen, nocase))
|
|
return 1; /* match */
|
|
string++;
|
|
stringLen--;
|
|
}
|
|
return 0; /* no match */
|
|
break;
|
|
case '?':
|
|
string++;
|
|
stringLen--;
|
|
break;
|
|
case '[':
|
|
{
|
|
int not, match;
|
|
|
|
pattern++;
|
|
patternLen--;
|
|
not = pattern[0] == '^';
|
|
if (not) {
|
|
pattern++;
|
|
patternLen--;
|
|
}
|
|
match = 0;
|
|
while(1) {
|
|
if (pattern[0] == '\\' && patternLen >= 2) {
|
|
pattern++;
|
|
patternLen--;
|
|
if (pattern[0] == string[0])
|
|
match = 1;
|
|
} else if (pattern[0] == ']') {
|
|
break;
|
|
} else if (patternLen == 0) {
|
|
pattern--;
|
|
patternLen++;
|
|
break;
|
|
} else if (patternLen >= 3 && pattern[1] == '-') {
|
|
int start = pattern[0];
|
|
int end = pattern[2];
|
|
int c = string[0];
|
|
if (start > end) {
|
|
int t = start;
|
|
start = end;
|
|
end = t;
|
|
}
|
|
if (nocase) {
|
|
start = tolower(start);
|
|
end = tolower(end);
|
|
c = tolower(c);
|
|
}
|
|
pattern += 2;
|
|
patternLen -= 2;
|
|
if (c >= start && c <= end)
|
|
match = 1;
|
|
} else {
|
|
if (!nocase) {
|
|
if (pattern[0] == string[0])
|
|
match = 1;
|
|
} else {
|
|
if (tolower((int)pattern[0]) == tolower((int)string[0]))
|
|
match = 1;
|
|
}
|
|
}
|
|
pattern++;
|
|
patternLen--;
|
|
}
|
|
if (not)
|
|
match = !match;
|
|
if (!match)
|
|
return 0; /* no match */
|
|
string++;
|
|
stringLen--;
|
|
break;
|
|
}
|
|
case '\\':
|
|
if (patternLen >= 2) {
|
|
pattern++;
|
|
patternLen--;
|
|
}
|
|
/* fall through */
|
|
default:
|
|
if (!nocase) {
|
|
if (pattern[0] != string[0])
|
|
return 0; /* no match */
|
|
} else {
|
|
if (tolower((int)pattern[0]) != tolower((int)string[0]))
|
|
return 0; /* no match */
|
|
}
|
|
string++;
|
|
stringLen--;
|
|
break;
|
|
}
|
|
pattern++;
|
|
patternLen--;
|
|
if (stringLen == 0) {
|
|
while(*pattern == '*') {
|
|
pattern++;
|
|
patternLen--;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
if (patternLen == 0 && stringLen == 0)
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
/* ========================== 'show' subcommand ============================= */
|
|
|
|
void gguf_tools_show(const char *filename) {
|
|
gguf_ctx *ctx = gguf_open(filename);
|
|
if (ctx == NULL) {
|
|
perror(filename);
|
|
exit(1);
|
|
}
|
|
|
|
/* Show general information about the neural network. */
|
|
printf("%s (ver %d): %llu key-value pairs, %llu tensors\n",
|
|
filename,
|
|
(int)ctx->header->version,
|
|
(unsigned long long)ctx->header->metadata_kv_count,
|
|
(unsigned long long)ctx->header->tensor_count);
|
|
|
|
/* Show all the key-value pairs. */
|
|
gguf_key key;
|
|
while (gguf_get_key(ctx,&key)) {
|
|
printf("%.*s: [%s] ", (int)key.namelen, key.name, gguf_get_value_type_name(key.type));
|
|
gguf_print_value(ctx,key.type,key.val,Opt.verbose);
|
|
printf("\n");
|
|
}
|
|
|
|
/* Show all the tensors. */
|
|
gguf_tensor tensor;
|
|
uint64_t params = 0;
|
|
while (gguf_get_tensor(ctx,&tensor)) {
|
|
printf("%s tensor %.*s",
|
|
gguf_get_tensor_type_name(tensor.type),
|
|
(int)tensor.namelen,
|
|
tensor.name);
|
|
if (!Opt.diffable)
|
|
printf(" @%" PRIu64, tensor.offset);
|
|
printf(", %" PRIu64 " weights, dims ", tensor.num_weights);
|
|
for (uint32_t j = 0; j < tensor.ndim; j++) {
|
|
printf("%s%" PRIu64 "",(j == 0) ? "[" : ",", tensor.dim[j]);
|
|
}
|
|
printf("]");
|
|
if (!Opt.diffable)
|
|
printf(", %" PRIu64 " bytes", tensor.bsize);
|
|
printf("\n");
|
|
|
|
params += tensor.num_weights;
|
|
}
|
|
printf("gguf-tools.info.parameters: %.02fB\n",
|
|
(double)params/1000000000);
|
|
return;
|
|
}
|
|
|
|
/* ======================= 'split-mixtral' subcommand ======================= */
|
|
|
|
/* Read a Mixtral MoE model and creates a new non-MoE GGUF file based
|
|
* on the weights of the experts with IDs in the array of 'experts_id'.
|
|
* The array must contain 32 integers, one for each layer. */
|
|
void gguf_tools_split_mixtral(int *experts_id, const char *mixtral_filename, const char *output_filename) {
|
|
gguf_ctx *mixtral = gguf_open(mixtral_filename);
|
|
if (mixtral == NULL) {
|
|
perror(mixtral_filename);
|
|
exit(1);
|
|
}
|
|
|
|
gguf_ctx *output = gguf_create(output_filename, GGUF_NONE);
|
|
if (output == NULL) {
|
|
perror(output_filename);
|
|
exit(1);
|
|
}
|
|
|
|
/* To start, copy all the key value items, excluding the one
|
|
* related to the experts. */
|
|
gguf_key key;
|
|
while (gguf_get_key(mixtral,&key)) {
|
|
char keybuf[1024];
|
|
snprintf(keybuf,sizeof(keybuf),"%.*s",(int)key.namelen, key.name);
|
|
|
|
int skip = strstr(keybuf,"llama.expert_") != NULL;
|
|
|
|
if (!skip)
|
|
printf("Copying %s\n", keybuf);
|
|
uint64_t value_start_offset = mixtral->off;
|
|
void *value = mixtral->data+mixtral->off;
|
|
// Just consume the value without doing anything with it.
|
|
gguf_do_with_value(mixtral,key.type,key.val,NULL,0,0,NULL);
|
|
uint64_t value_len = mixtral->off - value_start_offset;
|
|
|
|
// Now append the value to the output model.
|
|
if (!skip)
|
|
gguf_append_kv(output,key.name,key.namelen,key.type,value,value_len);
|
|
}
|
|
|
|
/* Now it's time to copy the tensors. We need to copy all the shared
|
|
* tensors (between the different experts), but only a set of
|
|
* expert-specific tensors corresponding to the expert ID the user
|
|
* wants to extract. */
|
|
struct tensor_to_copy {
|
|
sds dest_name; // Tensor name in the output file.
|
|
gguf_tensor orig_info; // Original tensor info.
|
|
uint64_t dest_offset; // Destination offset in output file.
|
|
uint64_t size; // Tensor total bytes.
|
|
};
|
|
|
|
uint32_t num_tensors = 0;
|
|
uint32_t max_tensors = 2048;
|
|
|
|
struct tensor_to_copy *tensors =
|
|
malloc(sizeof(struct tensor_to_copy)*max_tensors);
|
|
if (tensors == NULL) {
|
|
perror("Allocating tensors info array");
|
|
exit(1);
|
|
}
|
|
|
|
/* Scan Mixtral tensors looking for the ones we need to copy
|
|
* in the output model. */
|
|
gguf_tensor tensor_info;
|
|
while (gguf_get_tensor(mixtral,&tensor_info)) {
|
|
assert(num_tensors < max_tensors);
|
|
|
|
char tn[1024]; // Tensor name as null terminated string.
|
|
snprintf(tn,sizeof(tn),"%.*s",(int)tensor_info.namelen, tensor_info.name);
|
|
|
|
/* The tensor is a feed-forward tensor? We want to copy only
|
|
* the ones of our expert ID. */
|
|
if (strstr(tn,".ffn_") != NULL && strstr(tn,".ffn_norm") == NULL) {
|
|
/* Extract which block this FFN belongs. */
|
|
int block;
|
|
assert(memcmp(tn,"blk.",4) == 0); // Must start with blk.<block>
|
|
char *p = strchr(tn+4,'.');
|
|
assert(p != NULL);
|
|
*p = 0;
|
|
block = atoi(tn+4);
|
|
*p = '.';
|
|
assert(block >= 0 && block < 32);
|
|
|
|
/* Now that we have the block, we can select the corresponding
|
|
* expert ID we want to use for this block. */
|
|
int expert_id = experts_id[block];
|
|
|
|
char match[32];
|
|
snprintf(match,sizeof(match),".%d.weight",expert_id);
|
|
char *match_ptr = strstr(tn,match);
|
|
if (match_ptr == NULL) {
|
|
printf("Skipping tensor %s\n", tn);
|
|
continue; // Skip this tensor.
|
|
}
|
|
|
|
/* We need to remove the .<id>. from the name. */
|
|
size_t taillen = strlen(match_ptr);
|
|
memmove(match_ptr,match_ptr+2,taillen+1);
|
|
}
|
|
|
|
/* Create the entry for this tensor. Later we will scan all our
|
|
* entries and append data to our output tensor. */
|
|
tensors[num_tensors].dest_name = sdsnew(tn);
|
|
if (tensors[num_tensors].dest_name == NULL) {
|
|
perror("Allocating test tensor name");
|
|
exit(1);
|
|
}
|
|
tensors[num_tensors].orig_info = tensor_info;
|
|
tensors[num_tensors].size = tensor_info.bsize;
|
|
num_tensors++;
|
|
}
|
|
|
|
/* Now we need to set the offset for our destination tensors. As
|
|
* we calculate the offsets, we can emit the tensors information
|
|
* section as well. */
|
|
uint64_t tensor_off = 0; // Tensor offsets are relative to data section,
|
|
// so we start at offset 0.
|
|
for (uint32_t j = 0; j < num_tensors; j++) {
|
|
/* Align offset. */
|
|
tensor_off += gguf_get_alignment_padding(mixtral->alignment,tensor_off);
|
|
tensors[j].dest_offset = tensor_off;
|
|
if (gguf_append_tensor_info(output,tensors[j].dest_name,strlen(tensors[j].dest_name),tensors[j].orig_info.ndim,tensors[j].orig_info.dim,tensors[j].orig_info.type,tensor_off) == 0)
|
|
{
|
|
perror("Failed to append tensor info");
|
|
exit(1);
|
|
}
|
|
tensor_off += tensors[j].orig_info.bsize;
|
|
}
|
|
printf("Output file: after writing tensors info, file size is: %" PRIu64 "\n", output->size);
|
|
|
|
/* Finally, append the tensors weights. */
|
|
for (uint32_t j = 0; j < num_tensors; j++) {
|
|
printf("Writing tensor %s (weights from %.*s)\n", tensors[j].dest_name,
|
|
(int)tensors[j].orig_info.namelen, tensors[j].orig_info.name);
|
|
if (gguf_append_tensor_data(output,tensors[j].orig_info.weights_data,
|
|
tensors[j].orig_info.bsize) == 0)
|
|
{
|
|
perror("Failed to append tensor data");
|
|
exit(1);
|
|
}
|
|
}
|
|
exit(0);
|
|
}
|
|
|
|
/* ====================== 'inspect-weights' subcommand ====================== */
|
|
|
|
void gguf_tools_inspect_weights(const char *filename, const char *tname, uint64_t count) {
|
|
gguf_ctx *ctx = gguf_open(filename);
|
|
if (ctx == NULL) {
|
|
perror(filename);
|
|
exit(1);
|
|
}
|
|
|
|
/* Skip all the key-value pairs. */
|
|
gguf_skip_key_values_section(ctx);
|
|
|
|
/* Look for the tensor with the specified name. */
|
|
size_t tnamelen = strlen(tname);
|
|
gguf_tensor tensor;
|
|
while (gguf_get_tensor(ctx,&tensor)) {
|
|
if (tensor.namelen != tnamelen ||
|
|
memcmp(tensor.name,tname,tnamelen)) continue;
|
|
break; // Matching tensor found!
|
|
}
|
|
|
|
if (tensor.name == NULL) {
|
|
fprintf(stderr, "A tensor with the specified name was not found\n");
|
|
exit(1);
|
|
}
|
|
|
|
float *weights = gguf_tensor_to_float(&tensor);
|
|
if (weights == NULL) {
|
|
if (errno == EINVAL) {
|
|
fprintf(stderr,"Unsupported tensor type: %s\n",
|
|
gguf_get_tensor_type_name(tensor.type));
|
|
} else {
|
|
fprintf(stderr,"Out of memory\n");
|
|
}
|
|
exit(1);
|
|
}
|
|
|
|
uint64_t strides[GGUF_TENSOR_MAX_DIM] = {0};
|
|
strides[tensor.ndim-1] = 1;
|
|
for (int j = tensor.ndim - 2; j >= 0; j--) {
|
|
strides[j] = tensor.dim[tensor.ndim - 2 - j] * strides[j + 1];
|
|
}
|
|
|
|
const int ident = 4;
|
|
uint64_t j = 0;
|
|
int broke = 1;
|
|
while (j < tensor.num_weights) {
|
|
int last = j + 1 == tensor.num_weights;
|
|
for (int k = 0; k < (int) tensor.ndim - 1; k++) {
|
|
if (j % strides[k] == 0) {
|
|
printf("%*s\n", k * ident, "[");
|
|
}
|
|
}
|
|
if (broke) {
|
|
printf("%*s", tensor.ndim * ident, "");
|
|
}
|
|
printf("%f%s", weights[j], last ? "" : ", ");
|
|
broke = 0;
|
|
j++;
|
|
for (int k = (int) tensor.ndim - 2; k >= 0; k--) {
|
|
if (j % strides[k] == 0) {
|
|
if (!broke) {
|
|
broke = 1;
|
|
printf("\n");
|
|
}
|
|
printf("%*s%s\n", k * ident, "]", last ? "" : ",");
|
|
}
|
|
}
|
|
if (!broke && j % 4 == 0) {
|
|
broke = 1;
|
|
printf("\n");
|
|
}
|
|
if (j == count) break;
|
|
}
|
|
if (!broke) printf("\n");
|
|
free(weights);
|
|
return;
|
|
}
|
|
|
|
/* ========================== 'compare' subcommand ========================== */
|
|
|
|
/* Given two tensors of the same length, return the average difference
|
|
* of their weights, in percentage.
|
|
*
|
|
* The difference is calculated like that: the average of the absolute values
|
|
* of all the weights in the two vectors is calculated. Then, for each set
|
|
* of corresponding weights, we calculate the difference, and the percentage
|
|
* according to the average value (100%). The function returns the average
|
|
* of the percentage of difference between all the pairs.
|
|
*
|
|
* Returns 1 on success, 0 if one or both the provided tensors can't be
|
|
* dequantized. */
|
|
int tensors_avg_diff(gguf_tensor *t1, gguf_tensor *t2, double *diff) {
|
|
float *weights1 = gguf_tensor_to_float(t1);
|
|
float *weights2 = gguf_tensor_to_float(t2);
|
|
if (weights1 == NULL || weights2 == NULL) {
|
|
free(weights1);
|
|
free(weights2);
|
|
return 0;
|
|
}
|
|
|
|
/* Compute the average magnitude of the weights. */
|
|
double tot_mag = 0;
|
|
for (uint64_t j = 0; j < t1->num_weights; j++) {
|
|
tot_mag += fabs(weights1[j]);
|
|
tot_mag += fabs(weights2[j]);
|
|
}
|
|
double avg_mag = tot_mag/(t1->num_weights*2);
|
|
|
|
/* Compute the average % difference of the weights. */
|
|
double tot_diff = 0;
|
|
for (uint64_t j = 0; j < t1->num_weights; j++)
|
|
tot_diff += fabs(weights1[j]-weights2[j]);
|
|
double avg_diff = tot_diff / t1->num_weights;
|
|
|
|
/* Multiply by 75 to normalize the difference of a
|
|
* random variable between -N and +N to 0 - 100% */
|
|
*diff = avg_diff / avg_mag * 75;
|
|
|
|
free(weights1);
|
|
free(weights2);
|
|
return 1;
|
|
}
|
|
|
|
void gguf_tools_compare(const char *file1, const char *file2) {
|
|
gguf_ctx *ctx1 = gguf_open(file1);
|
|
if (ctx1 == NULL) {
|
|
perror(file1);
|
|
exit(1);
|
|
}
|
|
|
|
gguf_ctx *ctx2 = gguf_open(file2);
|
|
if (ctx2 == NULL) {
|
|
perror(file2);
|
|
exit(1);
|
|
}
|
|
|
|
/* Skip all the key-value pairs. */
|
|
gguf_skip_key_values_section(ctx1);
|
|
|
|
/* For each tensor of the first net... */
|
|
gguf_tensor tensor1, tensor2;
|
|
while (gguf_get_tensor(ctx1,&tensor1)) {
|
|
gguf_skip_key_values_section(ctx2);
|
|
while (gguf_get_tensor(ctx2,&tensor2)) {
|
|
/* Search for a tensor with the same name. */
|
|
if (tensor2.namelen == tensor1.namelen &&
|
|
memcmp(tensor2.name,tensor1.name,tensor1.namelen) == 0)
|
|
{
|
|
printf("[%.*s]: ", (int)tensor1.namelen, tensor1.name);
|
|
fflush(stdout);
|
|
if (tensor1.num_weights != tensor2.num_weights) {
|
|
printf("size mismatch\n");
|
|
} else {
|
|
double diff;
|
|
if (tensors_avg_diff(&tensor1, &tensor2, &diff)) {
|
|
printf("avg weights difference: %f%%\n", diff);
|
|
} else {
|
|
printf("dequantization function missing...\n");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
gguf_rewind(ctx2);
|
|
}
|
|
}
|
|
|
|
/* ======================= Main and CLI options parsing ===================== */
|
|
|
|
void gguf_tools_usage(const char *progname) {
|
|
printf("Usage: %s <subcommand> [arguments...] [options...]\n"
|
|
"Subcommands:\n"
|
|
" show <filename> -- show GGUF model keys and tensors.\n"
|
|
" inspect-tensor <filename> <tensor-name> [count] -- show tensor weights.\n"
|
|
" compare <file1> <file2> -- avg weights diff for matching tensor names.\n"
|
|
" split-mixtral <ids...> mixtral.gguf out.gguf -- extract expert.\n"
|
|
"Options:\n"
|
|
" --verbose :With 'show', print full arrays (e.g. token lists)\n"
|
|
" --diffable :Don't show tensor file offsets and sizes\n"
|
|
"Example:\n"
|
|
" split-mixtral 65230776370407150546470161412165 mixtral.gguf out.gguf\n"
|
|
, progname);
|
|
exit(1);
|
|
}
|
|
|
|
int main(int argc, char **argv) {
|
|
if (argc < 3) gguf_tools_usage(argv[0]);
|
|
|
|
/* Parse options before getting into subcommands parsing. */
|
|
for (int j = 1; j < argc; j++) {
|
|
/* Every time we find a an option, we try to parse it
|
|
* and set the used argv[] entires to NULL. Later we remove
|
|
* the NULL entries. In this way '--options' can be anywhere,
|
|
* making the tool simpler to use. */
|
|
if (!strcmp(argv[j],"--verbose")) {
|
|
argv[j] = NULL;
|
|
argc--;
|
|
Opt.verbose = 1;
|
|
}
|
|
if (!strcmp(argv[j],"--diffable")) {
|
|
argv[j] = NULL;
|
|
argc--;
|
|
Opt.diffable = 1;
|
|
}
|
|
}
|
|
|
|
/* Strip empty elements. */
|
|
for (int j = 1; j < argc; j++) {
|
|
if (argv[j] == NULL) {
|
|
memmove(argv+j, argv+j+1, sizeof(char*) * (argc-j));
|
|
}
|
|
}
|
|
|
|
if (!strcmp(argv[1],"show") && argc == 3) {
|
|
gguf_tools_show(argv[2]);
|
|
} else if (!strcmp(argv[1],"compare") && argc == 4) {
|
|
gguf_tools_compare(argv[2],argv[3]);
|
|
} else if (!strcmp(argv[1],"inspect-tensor") && (argc == 4 || argc == 5)) {
|
|
gguf_tools_inspect_weights(argv[2],argv[3],
|
|
argc == 5 ? atoi(argv[4]) : 0);
|
|
} else if (!strcmp(argv[1],"split-mixtral") && argc == 5) {
|
|
int experts[32];
|
|
size_t elen = strlen(argv[2]);
|
|
for (size_t j = 0; j < 32; j++) {
|
|
if (j < elen) {
|
|
experts[j] = argv[2][j] - '0';
|
|
if (experts[j] < 0 || experts[j] > 7) {
|
|
fprintf(stderr,"Invalid expert ID: %d\n", experts[j]);
|
|
exit(1);
|
|
}
|
|
} else {
|
|
/* If there aren't 32 digits in the input, use the last
|
|
* one repeated up to the last layer. */
|
|
experts[j] = j > 1 ? experts[j-1] : 0;
|
|
}
|
|
}
|
|
gguf_tools_split_mixtral(experts,argv[3],argv[4]);
|
|
} else {
|
|
gguf_tools_usage(argv[0]);
|
|
}
|
|
return 0;
|
|
}
|