mirror of
https://github.com/antirez/gguf-tools.git
synced 2025-09-17 19:08:07 +08:00
Q8_0 dequantization.
This commit is contained in:
2
fp16.h
2
fp16.h
@@ -1,6 +1,6 @@
|
|||||||
/* See fp16.c. */
|
/* See fp16.c. */
|
||||||
|
|
||||||
#ifdef FP16_h
|
#ifndef FP16_h
|
||||||
#define FP16_h
|
#define FP16_h
|
||||||
float from_half(uint16_t h);
|
float from_half(uint16_t h);
|
||||||
uint16_t to_half(float f);
|
uint16_t to_half(float f);
|
||||||
|
63
gguf-tools.c
63
gguf-tools.c
@@ -3,9 +3,11 @@
|
|||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
#include <errno.h>
|
||||||
|
|
||||||
#include "gguflib.h"
|
#include "gguflib.h"
|
||||||
#include "sds.h"
|
#include "sds.h"
|
||||||
|
#include "fp16.h"
|
||||||
|
|
||||||
/* ========================== Utility functions ============================ */
|
/* ========================== Utility functions ============================ */
|
||||||
|
|
||||||
@@ -310,15 +312,63 @@ void gguf_tools_split_mixtral(int *experts_id, const char *mixtral_filename, con
|
|||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void gguf_tools_inspect_weights(const char *filename, const char *tname, uint64_t count) {
|
||||||
|
gguf_ctx *ctx = gguf_init(filename);
|
||||||
|
if (ctx == NULL) {
|
||||||
|
perror("Opening GGUF file");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Skip all the key-value pairs. */
|
||||||
|
gguf_skip_key_values_section(ctx);
|
||||||
|
|
||||||
|
/* Look for the tensor with the specified name. */
|
||||||
|
size_t tnamelen = strlen(tname);
|
||||||
|
gguf_tensor tensor;
|
||||||
|
while (gguf_get_tensor(ctx,&tensor)) {
|
||||||
|
if (tensor.namelen != tnamelen ||
|
||||||
|
memcmp(tensor.name,tname,tnamelen)) continue;
|
||||||
|
break; // Matching tensor found!
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tensor.name == NULL) {
|
||||||
|
fprintf(stderr, "A tensor with the specified name was not found\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
float *weights = gguf_tensor_to_float(&tensor);
|
||||||
|
if (weights == NULL) {
|
||||||
|
if (errno == EINVAL) {
|
||||||
|
fprintf(stderr,"Unsupported tensor type: %s\n",
|
||||||
|
gguf_get_tensor_type_name(tensor.type));
|
||||||
|
} else {
|
||||||
|
fprintf(stderr,"Out of memory\n");
|
||||||
|
}
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t j = 0;
|
||||||
|
while (j < tensor.num_weights) {
|
||||||
|
printf("%f, ", weights[j]);
|
||||||
|
j++;
|
||||||
|
if (j % 4 == 0) printf("\n");
|
||||||
|
if (j == count) break;
|
||||||
|
}
|
||||||
|
if (j % 4 != 0) printf("\n");
|
||||||
|
free(weights);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/* ======================= Main and CLI options parsing ===================== */
|
/* ======================= Main and CLI options parsing ===================== */
|
||||||
|
|
||||||
void gguf_tools_usage(const char *progname) {
|
void gguf_tools_usage(const char *progname) {
|
||||||
printf("Usage: %s <subcommand> [options...]\n"
|
printf("Usage: %s <subcommand> [options...]\n"
|
||||||
"Subcommands:\n"
|
"Subcommands:\n"
|
||||||
" show <filename> -- show GGUF model keys and tensors.\n"
|
" show <filename> -- show GGUF model keys and tensors.\n"
|
||||||
" split-mixtral <ids...> mixtral.gguf out.gguf -- extract expert.\n"
|
" inspect-tensor <filename> <tensor-name> [count] -- show tensor weights.\n"
|
||||||
"Example:\n"
|
" split-mixtral <ids...> mixtral.gguf out.gguf -- extract expert.\n"
|
||||||
" split-mixtral 65230776370407150546470161412165 mixtral.gguf out.gguf\n"
|
"Example:\n"
|
||||||
|
" split-mixtral 65230776370407150546470161412165 mixtral.gguf out.gguf\n"
|
||||||
, progname);
|
, progname);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
@@ -328,6 +378,9 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
if (!strcmp(argv[1],"show") && argc == 3) {
|
if (!strcmp(argv[1],"show") && argc == 3) {
|
||||||
gguf_tools_show(argv[2]);
|
gguf_tools_show(argv[2]);
|
||||||
|
} else if (!strcmp(argv[1],"inspect-tensor") && (argc == 4 || argc == 5)) {
|
||||||
|
gguf_tools_inspect_weights(argv[2],argv[3],
|
||||||
|
argc == 5 ? atoi(argv[4]) : 0);
|
||||||
} else if (!strcmp(argv[1],"split-mixtral") && argc == 5) {
|
} else if (!strcmp(argv[1],"split-mixtral") && argc == 5) {
|
||||||
int experts[32];
|
int experts[32];
|
||||||
size_t elen = strlen(argv[2]);
|
size_t elen = strlen(argv[2]);
|
||||||
|
58
gguflib.c
58
gguflib.c
@@ -10,6 +10,7 @@
|
|||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
#include "gguflib.h"
|
#include "gguflib.h"
|
||||||
|
#include "fp16.h"
|
||||||
|
|
||||||
/* ============================ Low level functions ========================= */
|
/* ============================ Low level functions ========================= */
|
||||||
|
|
||||||
@@ -186,6 +187,14 @@ int gguf_get_key(gguf_ctx *ctx, gguf_key *key) {
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Skip all the key values pairs in the GGUF files to get to the
|
||||||
|
* tensors information segment. */
|
||||||
|
void gguf_skip_key_values_section(gguf_ctx *ctx) {
|
||||||
|
gguf_key key;
|
||||||
|
while (gguf_get_key(ctx,&key))
|
||||||
|
gguf_do_with_value(ctx,key.type,key.val,NULL,0,0,NULL);
|
||||||
|
}
|
||||||
|
|
||||||
/* Given an offset or a length, returns the padding needed to align it
|
/* Given an offset or a length, returns the padding needed to align it
|
||||||
* to ctx->alignment. */
|
* to ctx->alignment. */
|
||||||
uint64_t gguf_get_alignment_padding(uint64_t alignment, uint64_t offset) {
|
uint64_t gguf_get_alignment_padding(uint64_t alignment, uint64_t offset) {
|
||||||
@@ -219,14 +228,22 @@ void gguf_set_data_offset(gguf_ctx *ctx) {
|
|||||||
* there are still key-value pairs to process before getting into the
|
* there are still key-value pairs to process before getting into the
|
||||||
* tensors section.
|
* tensors section.
|
||||||
*
|
*
|
||||||
* When 0 is returned, we are at the end of the file and as a side
|
* The first time this function is called, as a side effect it will
|
||||||
* effect this function will set the data offset ctx->data_off. */
|
* set ctx->data_off to return tensors with absolute offsets.
|
||||||
|
*
|
||||||
|
* When 0 is returned, the tensor name is set to NULL, so that after
|
||||||
|
* a while() loop scanning tensors for a given condition, the caller
|
||||||
|
* can easily understand if the search terminated because the loop
|
||||||
|
* was exit or because all the entries were consumed. */
|
||||||
int gguf_get_tensor(gguf_ctx *ctx, gguf_tensor *tensor) {
|
int gguf_get_tensor(gguf_ctx *ctx, gguf_tensor *tensor) {
|
||||||
if (ctx->left_tensors == 0 || ctx->left_kv != 0) return 0;
|
if (ctx->left_tensors == 0 || ctx->left_kv != 0) {
|
||||||
|
tensor->name = NULL;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* We want to return tensor data with offsets relative to the start
|
/* We want to return tensor data with offsets relative to the start
|
||||||
* of the file, so that the user of the API is able to access tensors
|
* of the file, so that the user of the API is able to access tensors
|
||||||
* as it iterates over them. To do so, we need to perform a fulls
|
* as it iterates over them. To do so, we need to perform a full
|
||||||
* scan if this is the first tensor info we are reading. */
|
* scan if this is the first tensor info we are reading. */
|
||||||
if (ctx->data_off == 0) gguf_set_data_offset(ctx);
|
if (ctx->data_off == 0) gguf_set_data_offset(ctx);
|
||||||
|
|
||||||
@@ -480,3 +497,36 @@ int gguf_append_tensor_data(gguf_ctx *ctx, void *tensor, uint64_t tensor_size) {
|
|||||||
gguf_remap(ctx);
|
gguf_remap(ctx);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ============================ GGUF dequantization ========================= */
|
||||||
|
|
||||||
|
/* Convert the specified tensor (quantized or not) into an array of
|
||||||
|
* floats. The array is allocated with malloc(). If the tensor is already
|
||||||
|
* in FP32 floats format, it is just memcpy()-ed to the destination array.
|
||||||
|
*
|
||||||
|
* On OOM, NULL is returned. If the tensor format is not yet supported,
|
||||||
|
* NULL is returned as well, but errno is set to EINVAL. */
|
||||||
|
float *gguf_tensor_to_float(gguf_tensor *tensor) {
|
||||||
|
struct gguf_tensor_type_features *tf =
|
||||||
|
gguf_get_tensor_type_features(tensor->type);
|
||||||
|
uint64_t block_size = tf->bytes_per_block;
|
||||||
|
float *f = malloc(tensor->num_weights*sizeof(float));
|
||||||
|
if (tensor->type == GUFF_TYPE_Q8_0) {
|
||||||
|
int8_t *block = (int8_t*)tensor->weights_data;
|
||||||
|
uint64_t i = 0;
|
||||||
|
while(i < tensor->num_weights) {
|
||||||
|
/* For each block get the delta and convert all the
|
||||||
|
* weights in the block. */
|
||||||
|
float delta = from_half(*((uint16_t*)block));
|
||||||
|
for (uint32_t j = 0; j < tf->items_per_block; j++) {
|
||||||
|
f[i++] = block[j+2] * delta; // j+2 to skip the delta bytes.
|
||||||
|
if (i == tensor->num_weights) break;
|
||||||
|
}
|
||||||
|
block += block_size; // Go to the next block.
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
errno = EINVAL;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return f;
|
||||||
|
}
|
||||||
|
@@ -178,5 +178,7 @@ int gguf_append_kv(gguf_ctx *ctx, const char *keyname, uint64_t keylen, uint32_t
|
|||||||
int gguf_append_tensor_info(gguf_ctx *ctx, const char *tensorname, uint64_t namelen, uint32_t num_dim, uint64_t *dim, uint32_t type, uint64_t offset);
|
int gguf_append_tensor_info(gguf_ctx *ctx, const char *tensorname, uint64_t namelen, uint32_t num_dim, uint64_t *dim, uint32_t type, uint64_t offset);
|
||||||
int gguf_append_tensor_data(gguf_ctx *ctx, void *tensor, uint64_t tensor_size);
|
int gguf_append_tensor_data(gguf_ctx *ctx, void *tensor, uint64_t tensor_size);
|
||||||
uint64_t gguf_get_alignment_padding(uint64_t alignment, uint64_t offset);
|
uint64_t gguf_get_alignment_padding(uint64_t alignment, uint64_t offset);
|
||||||
|
void gguf_skip_key_values_section(gguf_ctx *ctx);
|
||||||
|
float *gguf_tensor_to_float(gguf_tensor *tensor);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Reference in New Issue
Block a user