Tensors parsing.

2025-12-16 00:18:52 +08:00 · 2023-12-24 17:20:04 +01:00
parent 4ff25fb178
commit 55a15a4230
2 changed files with 136 additions and 18 deletions
--- a/gguf-show.c
+++ b/gguf-show.c
@@ -7,19 +7,10 @@
 #include <errno.h>
 #include <unistd.h>
 #include <string.h>
+#include <assert.h>

 #include "gguf.h"

-typedef struct {
-    int fd;
-    uint8_t *data;  // Memory mapped data.
-    uint64_t size;  // Total file size.
-    struct gguf_header *header;     // GUFF file header info.
-    uint32_t left_kv;               // Number of key-value pairs yet to read.
-    uint32_t left_tensors;          // Number of tensors yet to read.
-    uint64_t off;                   // Offset of the next item to parse.
-} gguf_ctx;
-
 /* Open a GGUF file and return a parsing context. */
 gguf_ctx *gguf_init(char *filename) {
    struct stat sb;
@@ -55,6 +46,8 @@ gguf_ctx *gguf_init(char *filename) {
    ctx->off = sizeof(struct gguf_header);
    ctx->left_kv = ctx->header->metadata_kv_count;
    ctx->left_tensors = ctx->header->tensor_count;
+    ctx->alignment = 32; // Default alighment of GGUF files.
+    ctx->data_off = 0;   // Set later.
    return ctx;
 }

@@ -80,15 +73,115 @@ int gguf_get_key(gguf_ctx *ctx, gguf_key *key) {
    key->type = *type;
    ctx->off += 8+str->len+4; // Skip prefixed len + string + type.
    key->val = (void*)(ctx->data+ctx->off);
+
+    /* Update the context with the alignmnet data, if needed. */
+    const char *alignment_key = "general.alignmnet";
+    if (key->type == GGUF_VALUE_TYPE_UINT32 &&
+        key->namelen == strlen(alignment_key) &&
+        memcmp(alignment_key, key->name, key->namelen) == 0)
+    {
+        ctx->alignment = key->val->uint32;
+    }
    return 1;
 }

+/* Set the data section offset. This function must be called exactly when
+ * all the key-values are consumed, in the context of the first call of
+ * gguf_get_tensor(): this way we will be able to return tensor offsets
+ * as absolute positions and pointers to the mmapped file. */
+void gguf_set_data_offset(gguf_ctx *ctx) {
+    assert(ctx->left_kv == 0 && ctx->left_tensors == ctx->header->tensor_count);
+
+    uint64_t offset = ctx->off;
+    for (uint32_t j = 0; j < ctx->left_tensors; j++) {
+        struct gguf_string *str = (struct gguf_string*) (ctx->data+offset);
+        offset += 8+str->len;   // Skip prefixed len + string
+        uint32_t *num_dim = (uint32_t*)(ctx->data+offset);
+        offset += 4;            // Skip num dimentions.
+        offset += 8*(*num_dim); // Skip dimensions.
+        offset += 4;            // Skip tensor type.
+        offset += 8;            // Skip tensor offset.
+    }
+    uint64_t padding =
+        (ctx->alignment - (offset % ctx->alignment)) % ctx->alignment;
+    ctx->data_off = offset + padding;
+}
+
+/* Parse the next tensor info data. Returns information into 'tensor'.
+ * The function return value is 1 is a tensor was returned, or 0
+ * if there are no longer tensors to process in this GGUF file or if
+ * there are still key-value pairs to process before getting into the
+ * tensors section.
+ *
+ * When 0 is returned, we are at the end of the file and as a side
+ * effect this function will set the data offset ctx->data_off. */
+int gguf_get_tensor(gguf_ctx *ctx, gguf_tensor *tensor) {
+    if (ctx->left_tensors == 0 || ctx->left_kv != 0) return 0;
+
+    /* We want to return tensor data with offsets relative to the start
+     * of the file, so that the user of the API is able to access tensors
+     * as it iterates over them. To do so, we need to perform a fulls
+     * scan if this is the first tensor info we are reading. */
+    if (ctx->data_off == 0) gguf_set_data_offset(ctx);
+
+    ctx->left_tensors--;
+    struct gguf_string *str = (struct gguf_string*) (ctx->data+ctx->off);
+    ctx->off += 8+str->len; // Skip prefixed len + string + type.
+    tensor->namelen = str->len;
+    tensor->name = str->string;
+    uint32_t *num_dim = (uint32_t*) (ctx->data+ctx->off);
+    ctx->off += 4;  // Skip number of dimensions.
+    tensor->ndim = *num_dim;
+    assert(tensor->ndim <= GGUF_TENSOR_MAX_DIM);
+
+    /* Read the dimentions: all the unused dimentions are set to 1. */
+    tensor->num_weights = 1;
+    for (uint32_t j = 0; j < tensor->ndim; j++) {
+        if (j < tensor->ndim) {
+            uint64_t *dim = (uint64_t*) (ctx->data+ctx->off);
+            ctx->off += 8; // Skip dimension size.
+            tensor->dim[j] = *dim;
+            tensor->num_weights *= *dim;
+        } else {
+            tensor->dim[j] = 1;
+        }
+    }
+    uint32_t *type = (uint32_t*) (ctx->data+ctx->off);
+    ctx->off += 4;  // Skip tensor type.
+    tensor->type = *type;
+
+    uint64_t *offset = (uint64_t*) (ctx->data+ctx->off);
+    ctx->off += 8;  // Skip tensor offset.
+
+    tensor->offset = ctx->data_off + *offset;
+    tensor->weights = ctx->data + tensor->offset;
+    return 1;
+}
+
+const char *gguf_value_name[] = {
+    "uint8", "int8", "uint16", "int16", "uint32", "int32",
+    "float32", "bool", "string", "array", "uint64", "int64",
+    "float64"
+};
+
+const char *gguf_tensor_type_name[] = {
+    "f32", "f16", "q4_0", "q4_1", "q4_2 deprecated", "q4_3 deprecated",
+    "q5_0", "q5_1", "q8_0", "q8_1", "q2_k", "q3_k", "q4_k", "q5_k",
+    "q6_k", "q7_k", "q8_k", "i8", "i16", "i32", "count"
+};
+
 /* Return the value type name given the type ID. */
 const char *gguf_get_value_type_name(uint32_t type) {
    if (type >= sizeof(gguf_value_name)/sizeof(char*)) return "unknown";
    return gguf_value_name[type];
 }

+/* Return the tensor type name given the type ID. */
+const char *gguf_get_tensor_type_name(uint32_t type) {
+    if (type >= sizeof(gguf_tensor_type_name)/sizeof(char*)) return "unknown";
+    return gguf_tensor_type_name[type];
+}
+
 /* Return the length of the value pointed by 'val' of type 'type'.
 * For the array type the length can't be inferred without consuming
 * it, so 0 is returned. */
@@ -260,5 +353,15 @@ int main(int argc, char **argv) {
        gguf_print_value(ctx,key.type,key.val,0);
        printf("\n");
    }
+
+    gguf_tensor tensor;
+    while (gguf_get_tensor(ctx,&tensor)) {
+        printf("%s tensor %.*s @%llu, %llu weights\n",
+            gguf_get_tensor_type_name(tensor.type),
+            (int)tensor.namelen,
+            tensor.name,
+            tensor.offset,
+            tensor.num_weights);
+    }
    return 0;
 }
--- a/gguf.h
+++ b/gguf.h
@@ -65,12 +65,6 @@ enum gguf_value_type {
    GGUF_VALUE_TYPE_ARRAY_END = 101
 };

-const char *gguf_value_name[] = {
-    "uint8", "int8", "uint16", "int16", "uint32", "int32",
-    "float32", "bool", "string", "array", "uint64", "int64",
-    "float64"
-};
-
 // A string in GGUF.
 struct gguf_string {
    // The length of the string, in bytes.
@@ -126,8 +120,29 @@ typedef struct {
    union gguf_value *val;
 } gguf_key;

+#define GGUF_TENSOR_MAX_DIM 8           // Future-proof: actual limit is 4.
 typedef struct {
    const char *name;
    size_t namelen;
-    int type;
-} gguf_value;
+    uint32_t type;                      // Tensor type (enum gguf_tensor_type).
+    uint32_t ndim;                      // Number of dimensions of the tensor.
+    uint64_t dim[GGUF_TENSOR_MAX_DIM];  // Dimensions (Eg. [512, 1024, 1, 1]).
+    uint64_t offset;                    // Offset from start of file.
+    uint64_t size;                      // Total size in bytes.
+    uint64_t num_weights;               // Total number of parameters.
+    uint8_t *weights;                   // Pointer to the mmaped file.
+} gguf_tensor;
+
+typedef struct {
+    int fd;
+    uint8_t *data;  // Memory mapped data.
+    uint64_t size;  // Total file size.
+    struct gguf_header *header;     // GUFF file header info.
+    uint32_t left_kv;               // Number of key-value pairs yet to read.
+    uint32_t left_tensors;          // Number of tensors yet to read.
+    uint64_t off;                   // Offset of the next item to parse.
+    uint64_t data_off;              // Offset of tensor data section. This
+                                    // is only set when all the kv/tensor header
+                                    // entries are processed. Initially 0.
+    uint64_t alignment;             // File data alignment. Default: 32 bytes.
+} gguf_ctx;