From 7e9c2bd6a75a495b9f3716ec8c239d260e443641 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 26 Dec 2023 09:20:54 +0100 Subject: [PATCH] Better explain the tensor total size math. --- gguflib.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/gguflib.c b/gguflib.c index 5216b2a..6cf5b83 100644 --- a/gguflib.c +++ b/gguflib.c @@ -262,6 +262,13 @@ int gguf_get_tensor(gguf_ctx *ctx, gguf_tensor *tensor) { tensor->offset = ctx->data_off + *offset; tensor->weights_data = ctx->data + tensor->offset; + /* To accurately calculate the bytes used by this tensor on the GGUF + * file, we need to take into account that quantization methods store + * tensors as block of N weights. So first of all we need to understand + * the number of padding weights (since the last block may have just + * fewer weights stored inside, but still requires to be stored to its full + * length). Then we can do the math to see how many blocks we need, and + * multiply by the block size to obtain the final total size. */ struct gguf_tensor_type_features *tf; tf = gguf_get_tensor_type_features(tensor->type); uint64_t weights_padding = gguf_get_alignment_padding(tf->items_per_block,tensor->num_weights);