GGUF parsing, initial design and functionalities.

2025-12-16 00:18:52 +08:00 · 2023-12-24 10:36:26 +01:00
commit b47eaca8d1
4 changed files with 415 additions and 0 deletions
--- a/21
+++ b/21
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2022 Georgi Gerganov
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/7
+++ b/7
@@ -0,0 +1,7 @@
+all: gguf-show
+
+gguf-show: gguf-show.c gguf.h
+	$(CC) gguf-show.c -g -ggdb -Wall -W -pedantic -O2 -o gguf-show
+
+clean:
+	rm -rf gguf-show
--- a/gguf-show.c
+++ b/gguf-show.c
@@ -0,0 +1,254 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+
+#include "gguf.h"
+
+typedef struct {
+    int fd;
+    uint8_t *data;  // Memory mapped data.
+    uint64_t size;  // Total file size.
+    struct gguf_header *header;     // GUFF file header info.
+    uint32_t left_kv;               // Number of key-value pairs yet to read.
+    uint32_t left_tensors;          // Number of tensors yet to read.
+    uint64_t off;                   // Offset of the next item to parse.
+} gguf_ctx;
+
+/* Open a GGUF file and return a parsing context. */
+gguf_ctx *gguf_init(char *filename) {
+    struct stat sb;
+    int fd = open(filename,O_RDONLY);
+    if (fd == -1) return NULL;
+    if (fstat(fd,&sb) == -1) {
+        close(fd);
+        return NULL;
+    }
+
+    /* Now that we have an open file and its total size, let's
+     * mmap it. */
+    void *mapped = mmap(0,sb.st_size,PROT_READ,MAP_PRIVATE,fd,0);
+    if (mapped == MAP_FAILED) {
+        close(fd);
+        return NULL;
+    }
+
+    /* Minimal sanity check... */
+    if (sb.st_size < (signed)sizeof(struct gguf_header) ||
+        memcmp(mapped,"GGUF",4) != 0)
+    {
+        errno = EINVAL;
+        return NULL;
+    }
+
+    /* Mapping successful. We can create our context object. */
+    gguf_ctx *ctx = malloc(sizeof(*ctx));
+    ctx->fd = fd;
+    ctx->data = mapped;
+    ctx->header = mapped;
+    ctx->size = sb.st_size;
+    ctx->off = sizeof(struct gguf_header);
+    ctx->left_kv = ctx->header->metadata_kv_count;
+    ctx->left_tensors = ctx->header->tensor_count;
+    return ctx;
+}
+
+/* Cleanup needed after gguf_init(), to terminate the context
+ * and cleanup resources. */
+void gguf_end(gguf_ctx *ctx) {
+    if (ctx == NULL) return;
+    munmap(ctx->data,ctx->size);
+    close(ctx->fd);
+    free(ctx);
+}
+
+/* Parse the next key. Returns key information into 'key'.
+ * The function return value is 1 is a key was returned, or 0
+ * if there are no longer keys to process in this GGUF file. */
+int gguf_get_key(gguf_ctx *ctx, gguf_key *key) {
+    if (ctx->left_kv == 0) return 0;
+    ctx->left_kv--;
+    struct gguf_string *str = (struct gguf_string*) (ctx->data+ctx->off);
+    key->namelen = str->len;
+    key->name = str->string;
+    uint32_t *type = (uint32_t*) (ctx->data+ctx->off+8+str->len);
+    key->type = *type;
+    ctx->off += 8+str->len+4; // Skip prefixed len + string + type.
+    key->val = (void*)(ctx->data+ctx->off);
+    return 1;
+}
+
+/* Return the value type name given the type ID. */
+const char *gguf_get_value_type_name(uint32_t type) {
+    if (type >= sizeof(gguf_value_name)/sizeof(char*)) return "unknown";
+    return gguf_value_name[type];
+}
+
+/* Return the length of the value pointed by 'val' of type 'type'.
+ * For the array type the length can't be inferred without consuming
+ * it, so 0 is returned. */
+uint64_t gguf_value_len(uint32_t type, union gguf_value *val) {
+    uint64_t valuelen = 0;
+    switch(type) {
+    case GGUF_VALUE_TYPE_BOOL:
+    case GGUF_VALUE_TYPE_UINT8:
+    case GGUF_VALUE_TYPE_INT8:
+        valuelen = 1; break;
+    case GGUF_VALUE_TYPE_UINT16:
+    case GGUF_VALUE_TYPE_INT16:
+        valuelen = 2; break;
+    case GGUF_VALUE_TYPE_UINT32:
+    case GGUF_VALUE_TYPE_INT32:
+    case GGUF_VALUE_TYPE_FLOAT32:
+        valuelen = 4; break;
+    case GGUF_VALUE_TYPE_UINT64:
+    case GGUF_VALUE_TYPE_INT64:
+    case GGUF_VALUE_TYPE_FLOAT64:
+        valuelen = 8; break;
+    case GGUF_VALUE_TYPE_STRING:
+        valuelen = 8+val->string.len; break;
+    }
+    return valuelen;
+}
+
+/* This function can be called after gguf_get_key(), since the context
+ * offset will be in the position of a value.
+ *
+ * The function will process the value, including nested values (in the
+ * case of an array value), and for each value will call the specified
+ * callback. As a side effect of calling this function, the context offset
+ * is advanced to consume the value.
+ *
+ * If the callback is set to NULL, no callback will be called,
+ * but the value will be consumed, so that it will be possible
+ * to call gguf_get_key() or gguf_get_tensor() to continue reading
+ * the file.
+ *
+ * When the callback is called, it gets the argument 'privdata' and 'in_array'
+ * as passed to this function. This is useful if the callback needs
+ * to take state (for pretty printing or alike) and to know if the
+ * elements it is processing belong to an array.
+ *
+ * The value of 'in_array' is the 1-based index of the element being
+ * processed.
+ *
+ * In the case of arrays, callbacks are also called with the special
+ * type ARRAY_START / ARRAY_END at the start/end of the array
+ * processing. */
+void gguf_do_with_value(gguf_ctx *ctx, uint32_t type, union gguf_value *val,
+                        void *privdata, uint64_t in_array, uint64_t array_len,
+                        void(*callback)(void *privdata, uint32_t type,
+                                     union gguf_value *val, uint64_t in_array,
+                                     uint64_t array_len))
+{
+    if (type == GGUF_VALUE_TYPE_ARRAY) {
+        uint32_t etype; // Elements type.
+        uint64_t len;   // Number of elements.
+        etype = val->array.type;
+        len = val->array.len;
+        //exit(1);
+        ctx->off += 4+8; // Skip elements type / array length.
+        callback(privdata,GGUF_VALUE_TYPE_ARRAY_START,val,in_array,len);
+        for (uint64_t j = 0; j < len; j++) {
+            val = (union gguf_value*)(ctx->data+ctx->off);
+            gguf_do_with_value(ctx,etype,val,privdata,j+1,len,callback);
+            /* As a side effect of calling gguf_do_with_value() ctx->off
+             * will be update, so 'val' will be set to the next element. */
+        }
+        callback(privdata,GGUF_VALUE_TYPE_ARRAY_END,NULL,in_array,len);
+    } else {
+        callback(privdata,type,val,in_array,array_len);
+        ctx->off += gguf_value_len(type,val);
+    }
+}
+
+struct gguf_print_options {
+    uint64_t max_array_items;       // Don't print more than N items.
+};
+
+/* Print a GGUF value. 'privdata' is used to pass guff_print_options and
+ * may be NULL if no options are provided.
+ *
+ * The function is designed to be used as a callback of gguf_do_with_value(). */
+void gguf_print_value_callback(void *privdata, uint32_t type, union gguf_value *val, uint64_t in_array, uint64_t array_len) {
+    (void) privdata;
+
+    switch (type) {
+        case GGUF_VALUE_TYPE_ARRAY_START:
+            printf("[(%llu items)",array_len); break;
+        case GGUF_VALUE_TYPE_ARRAY_END:
+            printf("]"); break;
+        case GGUF_VALUE_TYPE_UINT8:
+            printf("%u", val->uint8); break;
+        case GGUF_VALUE_TYPE_INT8:
+            printf("%d", val->int8); break;
+        case GGUF_VALUE_TYPE_UINT16:
+            printf("%u", val->uint16); break;
+        case GGUF_VALUE_TYPE_INT16:
+            printf("%d", val->int16); break;
+        case GGUF_VALUE_TYPE_UINT32:
+            printf("%u", val->uint32); break;
+        case GGUF_VALUE_TYPE_INT32:
+            printf("%d", val->int32); break;
+        case GGUF_VALUE_TYPE_FLOAT32:
+            printf("%f", val->float32); break;
+        case GGUF_VALUE_TYPE_BOOL:
+            if (val->boolval == 0 || val->boolval == 1)
+                printf("%s", val->boolval ? "true" : "false");
+            else
+                printf("Invalid boolean value %d", val->boolval);
+            break;
+        case GGUF_VALUE_TYPE_STRING:
+            printf("%.*s", (int)val->string.len, val->string.string); break;
+        case GGUF_VALUE_TYPE_UINT64:
+            printf("%llu", val->uint64); break;
+        case GGUF_VALUE_TYPE_INT64:
+            printf("%lld", val->int64); break;
+        case GGUF_VALUE_TYPE_FLOAT64:
+            printf("%lf", val->float64); break;
+        default:
+            printf("Unknown type\n");
+            break;
+    }
+    if (in_array && in_array != array_len) printf(", ");
+}
+
+/* Print the current value, including arrays. As a side effect
+ * the value will be consumed from the context, that will now point
+ * to the next item in the GGUF file. */
+void gguf_print_value(gguf_ctx *ctx, uint32_t type, union gguf_value *val) {
+    gguf_do_with_value(ctx,type,val,NULL,0,0,gguf_print_value_callback);
+}
+
+int main(int argc, char **argv) {
+    if (argc != 2) {
+        printf("Usage: %s <filename>\n",argv[0]);
+        exit(1);
+    }
+    gguf_ctx *ctx = gguf_init(argv[1]);
+    if (ctx == NULL) {
+        perror("Opening GGUF file");
+        exit(1);
+    }
+
+    /* Show general information about the neural network. */
+    printf("%s (ver %d): %llu key-value pairs, %llu tensors\n",
+        argv[1],
+        (int)ctx->header->version,
+        (unsigned long long)ctx->header->metadata_kv_count,
+        (unsigned long long)ctx->header->tensor_count);
+
+    /* Show all the key-value pairs. */
+    gguf_key key;
+    while (gguf_get_key(ctx,&key)) {
+        printf("%.*s: [%s] ", (int)key.namelen, key.name, gguf_get_value_type_name(key.type));
+        gguf_print_value(ctx,key.type,key.val);
+        printf("\n");
+    }
+    return 0;
+}
--- a/gguf.h
+++ b/gguf.h
@@ -0,0 +1,133 @@
+/* This code is adapted from https://github.com/ggerganov/ggml/
+ * The changes are copyright (C) 2024 Salvatore Sanfilippo <antirez@gmail.com>
+ * See LICENSE for licensing info. */
+
+#include <stdint.h>
+
+enum gguf_tensor_type {
+    GUFF_TYPE_F32  = 0,
+    GUFF_TYPE_F16  = 1,
+    GUFF_TYPE_Q4_0 = 2,
+    GUFF_TYPE_Q4_1 = 3,
+    // GUFF_TYPE_Q4_2 = 4, support has been removed
+    // GUFF_TYPE_Q4_3 (5) support has been removed
+    GUFF_TYPE_Q5_0 = 6,
+    GUFF_TYPE_Q5_1 = 7,
+    GUFF_TYPE_Q8_0 = 8,
+    GUFF_TYPE_Q8_1 = 9,
+    // k-quantizations
+    GUFF_TYPE_Q2_K = 10,
+    GUFF_TYPE_Q3_K = 11,
+    GUFF_TYPE_Q4_K = 12,
+    GUFF_TYPE_Q5_K = 13,
+    GUFF_TYPE_Q6_K = 14,
+    GUFF_TYPE_Q8_K = 15,
+    GUFF_TYPE_I8,
+    GUFF_TYPE_I16,
+    GUFF_TYPE_I32,
+    GUFF_TYPE_COUNT,
+};
+
+enum gguf_value_type {
+    // The value is a 8-bit unsigned integer.
+    GGUF_VALUE_TYPE_UINT8 = 0,
+    // The value is a 8-bit signed integer.
+    GGUF_VALUE_TYPE_INT8 = 1,
+    // The value is a 16-bit unsigned little-endian integer.
+    GGUF_VALUE_TYPE_UINT16 = 2,
+    // The value is a 16-bit signed little-endian integer.
+    GGUF_VALUE_TYPE_INT16 = 3,
+    // The value is a 32-bit unsigned little-endian integer.
+    GGUF_VALUE_TYPE_UINT32 = 4,
+    // The value is a 32-bit signed little-endian integer.
+    GGUF_VALUE_TYPE_INT32 = 5,
+    // The value is a 32-bit IEEE754 floating point number.
+    GGUF_VALUE_TYPE_FLOAT32 = 6,
+    // The value is a boolean.
+    // 1-byte value where 0 is false and 1 is true.
+    // Anything else is invalid, and should be treated as either the model
+    // being invalid or the reader being buggy.
+    GGUF_VALUE_TYPE_BOOL = 7,
+    // The value is a UTF-8 non-null-terminated string, with length prepended.
+    GGUF_VALUE_TYPE_STRING = 8,
+    // The value is an array of other values, with the length and type
+    // prepended. Arrays can be nested, and the length of the array is the
+    // number of elements in the array, not the number of bytes.
+    GGUF_VALUE_TYPE_ARRAY = 9,
+    // The value is a 64-bit unsigned little-endian integer.
+    GGUF_VALUE_TYPE_UINT64 = 10,
+    // The value is a 64-bit signed little-endian integer.
+    GGUF_VALUE_TYPE_INT64 = 11,
+    // The value is a 64-bit IEEE754 floating point number.
+    GGUF_VALUE_TYPE_FLOAT64 = 12,
+    // Special values used by the callbacks of gguf_do_with_value().
+    GGUF_VALUE_TYPE_ARRAY_START = 100,
+    GGUF_VALUE_TYPE_ARRAY_END = 101
+};
+
+const char *gguf_value_name[] = {
+    "uint8", "int8", "uint16", "int16", "uint32", "int32",
+    "float32", "bool", "string", "array", "uint64", "int64",
+    "float64"
+};
+
+// A string in GGUF.
+struct gguf_string {
+    // The length of the string, in bytes.
+    uint64_t len;
+    // The string as a UTF-8 non-null-terminated string.
+    char string[];
+};
+
+// Union of possible values.
+union gguf_value {
+    uint8_t uint8;
+    int8_t int8;
+    uint16_t uint16;
+    int16_t int16;
+    uint32_t uint32;
+    int32_t int32;
+    float float32;
+    uint64_t uint64;
+    int64_t int64;
+    double float64;
+    uint8_t boolval;
+    struct gguf_string string;
+    struct {
+        // Any value type is valid, including arrays.
+        uint32_t type;
+        // Number of elements, not bytes
+        uint64_t len;
+        // The array of values follow...
+    } __attribute__((packed)) array;
+};
+
+// Header
+struct gguf_header {
+    // Magic number to announce that this is a GGUF file.
+    // Must be `GGUF` at the byte level: `0x47` `0x47` `0x55` `0x46`.
+    uint32_t magic;
+    // The version of the format implemented.
+    // Must be `3` for version described in this spec.
+    uint32_t version;
+    // The number of tensors in the file.
+    // This is explicit, instead of being included in the metadata, to ensure
+    // it is always present for loading the tensors.
+    uint64_t tensor_count;
+    // The number of metadata key-value pairs.
+    uint64_t metadata_kv_count;
+};
+
+/* Key represneation in this library API. */
+typedef struct {
+    const char *name;
+    size_t namelen;
+    uint32_t type;
+    union gguf_value *val;
+} gguf_key;
+
+typedef struct {
+    const char *name;
+    size_t namelen;
+    int type;
+} gguf_value;