commit b47eaca8d162bcd69822405cb7cef54fe4513518 Author: antirez Date: Sun Dec 24 10:36:26 2023 +0100 GGUF parsing, initial design and functionalities. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..fb7ff0c --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Georgi Gerganov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..ebccda2 --- /dev/null +++ b/Makefile @@ -0,0 +1,7 @@ +all: gguf-show + +gguf-show: gguf-show.c gguf.h + $(CC) gguf-show.c -g -ggdb -Wall -W -pedantic -O2 -o gguf-show + +clean: + rm -rf gguf-show diff --git a/gguf-show.c b/gguf-show.c new file mode 100644 index 0000000..372b56f --- /dev/null +++ b/gguf-show.c @@ -0,0 +1,254 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gguf.h" + +typedef struct { + int fd; + uint8_t *data; // Memory mapped data. + uint64_t size; // Total file size. + struct gguf_header *header; // GUFF file header info. + uint32_t left_kv; // Number of key-value pairs yet to read. + uint32_t left_tensors; // Number of tensors yet to read. + uint64_t off; // Offset of the next item to parse. +} gguf_ctx; + +/* Open a GGUF file and return a parsing context. */ +gguf_ctx *gguf_init(char *filename) { + struct stat sb; + int fd = open(filename,O_RDONLY); + if (fd == -1) return NULL; + if (fstat(fd,&sb) == -1) { + close(fd); + return NULL; + } + + /* Now that we have an open file and its total size, let's + * mmap it. */ + void *mapped = mmap(0,sb.st_size,PROT_READ,MAP_PRIVATE,fd,0); + if (mapped == MAP_FAILED) { + close(fd); + return NULL; + } + + /* Minimal sanity check... */ + if (sb.st_size < (signed)sizeof(struct gguf_header) || + memcmp(mapped,"GGUF",4) != 0) + { + errno = EINVAL; + return NULL; + } + + /* Mapping successful. We can create our context object. */ + gguf_ctx *ctx = malloc(sizeof(*ctx)); + ctx->fd = fd; + ctx->data = mapped; + ctx->header = mapped; + ctx->size = sb.st_size; + ctx->off = sizeof(struct gguf_header); + ctx->left_kv = ctx->header->metadata_kv_count; + ctx->left_tensors = ctx->header->tensor_count; + return ctx; +} + +/* Cleanup needed after gguf_init(), to terminate the context + * and cleanup resources. */ +void gguf_end(gguf_ctx *ctx) { + if (ctx == NULL) return; + munmap(ctx->data,ctx->size); + close(ctx->fd); + free(ctx); +} + +/* Parse the next key. Returns key information into 'key'. + * The function return value is 1 is a key was returned, or 0 + * if there are no longer keys to process in this GGUF file. */ +int gguf_get_key(gguf_ctx *ctx, gguf_key *key) { + if (ctx->left_kv == 0) return 0; + ctx->left_kv--; + struct gguf_string *str = (struct gguf_string*) (ctx->data+ctx->off); + key->namelen = str->len; + key->name = str->string; + uint32_t *type = (uint32_t*) (ctx->data+ctx->off+8+str->len); + key->type = *type; + ctx->off += 8+str->len+4; // Skip prefixed len + string + type. + key->val = (void*)(ctx->data+ctx->off); + return 1; +} + +/* Return the value type name given the type ID. */ +const char *gguf_get_value_type_name(uint32_t type) { + if (type >= sizeof(gguf_value_name)/sizeof(char*)) return "unknown"; + return gguf_value_name[type]; +} + +/* Return the length of the value pointed by 'val' of type 'type'. + * For the array type the length can't be inferred without consuming + * it, so 0 is returned. */ +uint64_t gguf_value_len(uint32_t type, union gguf_value *val) { + uint64_t valuelen = 0; + switch(type) { + case GGUF_VALUE_TYPE_BOOL: + case GGUF_VALUE_TYPE_UINT8: + case GGUF_VALUE_TYPE_INT8: + valuelen = 1; break; + case GGUF_VALUE_TYPE_UINT16: + case GGUF_VALUE_TYPE_INT16: + valuelen = 2; break; + case GGUF_VALUE_TYPE_UINT32: + case GGUF_VALUE_TYPE_INT32: + case GGUF_VALUE_TYPE_FLOAT32: + valuelen = 4; break; + case GGUF_VALUE_TYPE_UINT64: + case GGUF_VALUE_TYPE_INT64: + case GGUF_VALUE_TYPE_FLOAT64: + valuelen = 8; break; + case GGUF_VALUE_TYPE_STRING: + valuelen = 8+val->string.len; break; + } + return valuelen; +} + +/* This function can be called after gguf_get_key(), since the context + * offset will be in the position of a value. + * + * The function will process the value, including nested values (in the + * case of an array value), and for each value will call the specified + * callback. As a side effect of calling this function, the context offset + * is advanced to consume the value. + * + * If the callback is set to NULL, no callback will be called, + * but the value will be consumed, so that it will be possible + * to call gguf_get_key() or gguf_get_tensor() to continue reading + * the file. + * + * When the callback is called, it gets the argument 'privdata' and 'in_array' + * as passed to this function. This is useful if the callback needs + * to take state (for pretty printing or alike) and to know if the + * elements it is processing belong to an array. + * + * The value of 'in_array' is the 1-based index of the element being + * processed. + * + * In the case of arrays, callbacks are also called with the special + * type ARRAY_START / ARRAY_END at the start/end of the array + * processing. */ +void gguf_do_with_value(gguf_ctx *ctx, uint32_t type, union gguf_value *val, + void *privdata, uint64_t in_array, uint64_t array_len, + void(*callback)(void *privdata, uint32_t type, + union gguf_value *val, uint64_t in_array, + uint64_t array_len)) +{ + if (type == GGUF_VALUE_TYPE_ARRAY) { + uint32_t etype; // Elements type. + uint64_t len; // Number of elements. + etype = val->array.type; + len = val->array.len; + //exit(1); + ctx->off += 4+8; // Skip elements type / array length. + callback(privdata,GGUF_VALUE_TYPE_ARRAY_START,val,in_array,len); + for (uint64_t j = 0; j < len; j++) { + val = (union gguf_value*)(ctx->data+ctx->off); + gguf_do_with_value(ctx,etype,val,privdata,j+1,len,callback); + /* As a side effect of calling gguf_do_with_value() ctx->off + * will be update, so 'val' will be set to the next element. */ + } + callback(privdata,GGUF_VALUE_TYPE_ARRAY_END,NULL,in_array,len); + } else { + callback(privdata,type,val,in_array,array_len); + ctx->off += gguf_value_len(type,val); + } +} + +struct gguf_print_options { + uint64_t max_array_items; // Don't print more than N items. +}; + +/* Print a GGUF value. 'privdata' is used to pass guff_print_options and + * may be NULL if no options are provided. + * + * The function is designed to be used as a callback of gguf_do_with_value(). */ +void gguf_print_value_callback(void *privdata, uint32_t type, union gguf_value *val, uint64_t in_array, uint64_t array_len) { + (void) privdata; + + switch (type) { + case GGUF_VALUE_TYPE_ARRAY_START: + printf("[(%llu items)",array_len); break; + case GGUF_VALUE_TYPE_ARRAY_END: + printf("]"); break; + case GGUF_VALUE_TYPE_UINT8: + printf("%u", val->uint8); break; + case GGUF_VALUE_TYPE_INT8: + printf("%d", val->int8); break; + case GGUF_VALUE_TYPE_UINT16: + printf("%u", val->uint16); break; + case GGUF_VALUE_TYPE_INT16: + printf("%d", val->int16); break; + case GGUF_VALUE_TYPE_UINT32: + printf("%u", val->uint32); break; + case GGUF_VALUE_TYPE_INT32: + printf("%d", val->int32); break; + case GGUF_VALUE_TYPE_FLOAT32: + printf("%f", val->float32); break; + case GGUF_VALUE_TYPE_BOOL: + if (val->boolval == 0 || val->boolval == 1) + printf("%s", val->boolval ? "true" : "false"); + else + printf("Invalid boolean value %d", val->boolval); + break; + case GGUF_VALUE_TYPE_STRING: + printf("%.*s", (int)val->string.len, val->string.string); break; + case GGUF_VALUE_TYPE_UINT64: + printf("%llu", val->uint64); break; + case GGUF_VALUE_TYPE_INT64: + printf("%lld", val->int64); break; + case GGUF_VALUE_TYPE_FLOAT64: + printf("%lf", val->float64); break; + default: + printf("Unknown type\n"); + break; + } + if (in_array && in_array != array_len) printf(", "); +} + +/* Print the current value, including arrays. As a side effect + * the value will be consumed from the context, that will now point + * to the next item in the GGUF file. */ +void gguf_print_value(gguf_ctx *ctx, uint32_t type, union gguf_value *val) { + gguf_do_with_value(ctx,type,val,NULL,0,0,gguf_print_value_callback); +} + +int main(int argc, char **argv) { + if (argc != 2) { + printf("Usage: %s \n",argv[0]); + exit(1); + } + gguf_ctx *ctx = gguf_init(argv[1]); + if (ctx == NULL) { + perror("Opening GGUF file"); + exit(1); + } + + /* Show general information about the neural network. */ + printf("%s (ver %d): %llu key-value pairs, %llu tensors\n", + argv[1], + (int)ctx->header->version, + (unsigned long long)ctx->header->metadata_kv_count, + (unsigned long long)ctx->header->tensor_count); + + /* Show all the key-value pairs. */ + gguf_key key; + while (gguf_get_key(ctx,&key)) { + printf("%.*s: [%s] ", (int)key.namelen, key.name, gguf_get_value_type_name(key.type)); + gguf_print_value(ctx,key.type,key.val); + printf("\n"); + } + return 0; +} diff --git a/gguf.h b/gguf.h new file mode 100644 index 0000000..35c141d --- /dev/null +++ b/gguf.h @@ -0,0 +1,133 @@ +/* This code is adapted from https://github.com/ggerganov/ggml/ + * The changes are copyright (C) 2024 Salvatore Sanfilippo + * See LICENSE for licensing info. */ + +#include + +enum gguf_tensor_type { + GUFF_TYPE_F32 = 0, + GUFF_TYPE_F16 = 1, + GUFF_TYPE_Q4_0 = 2, + GUFF_TYPE_Q4_1 = 3, + // GUFF_TYPE_Q4_2 = 4, support has been removed + // GUFF_TYPE_Q4_3 (5) support has been removed + GUFF_TYPE_Q5_0 = 6, + GUFF_TYPE_Q5_1 = 7, + GUFF_TYPE_Q8_0 = 8, + GUFF_TYPE_Q8_1 = 9, + // k-quantizations + GUFF_TYPE_Q2_K = 10, + GUFF_TYPE_Q3_K = 11, + GUFF_TYPE_Q4_K = 12, + GUFF_TYPE_Q5_K = 13, + GUFF_TYPE_Q6_K = 14, + GUFF_TYPE_Q8_K = 15, + GUFF_TYPE_I8, + GUFF_TYPE_I16, + GUFF_TYPE_I32, + GUFF_TYPE_COUNT, +}; + +enum gguf_value_type { + // The value is a 8-bit unsigned integer. + GGUF_VALUE_TYPE_UINT8 = 0, + // The value is a 8-bit signed integer. + GGUF_VALUE_TYPE_INT8 = 1, + // The value is a 16-bit unsigned little-endian integer. + GGUF_VALUE_TYPE_UINT16 = 2, + // The value is a 16-bit signed little-endian integer. + GGUF_VALUE_TYPE_INT16 = 3, + // The value is a 32-bit unsigned little-endian integer. + GGUF_VALUE_TYPE_UINT32 = 4, + // The value is a 32-bit signed little-endian integer. + GGUF_VALUE_TYPE_INT32 = 5, + // The value is a 32-bit IEEE754 floating point number. + GGUF_VALUE_TYPE_FLOAT32 = 6, + // The value is a boolean. + // 1-byte value where 0 is false and 1 is true. + // Anything else is invalid, and should be treated as either the model + // being invalid or the reader being buggy. + GGUF_VALUE_TYPE_BOOL = 7, + // The value is a UTF-8 non-null-terminated string, with length prepended. + GGUF_VALUE_TYPE_STRING = 8, + // The value is an array of other values, with the length and type + // prepended. Arrays can be nested, and the length of the array is the + // number of elements in the array, not the number of bytes. + GGUF_VALUE_TYPE_ARRAY = 9, + // The value is a 64-bit unsigned little-endian integer. + GGUF_VALUE_TYPE_UINT64 = 10, + // The value is a 64-bit signed little-endian integer. + GGUF_VALUE_TYPE_INT64 = 11, + // The value is a 64-bit IEEE754 floating point number. + GGUF_VALUE_TYPE_FLOAT64 = 12, + // Special values used by the callbacks of gguf_do_with_value(). + GGUF_VALUE_TYPE_ARRAY_START = 100, + GGUF_VALUE_TYPE_ARRAY_END = 101 +}; + +const char *gguf_value_name[] = { + "uint8", "int8", "uint16", "int16", "uint32", "int32", + "float32", "bool", "string", "array", "uint64", "int64", + "float64" +}; + +// A string in GGUF. +struct gguf_string { + // The length of the string, in bytes. + uint64_t len; + // The string as a UTF-8 non-null-terminated string. + char string[]; +}; + +// Union of possible values. +union gguf_value { + uint8_t uint8; + int8_t int8; + uint16_t uint16; + int16_t int16; + uint32_t uint32; + int32_t int32; + float float32; + uint64_t uint64; + int64_t int64; + double float64; + uint8_t boolval; + struct gguf_string string; + struct { + // Any value type is valid, including arrays. + uint32_t type; + // Number of elements, not bytes + uint64_t len; + // The array of values follow... + } __attribute__((packed)) array; +}; + +// Header +struct gguf_header { + // Magic number to announce that this is a GGUF file. + // Must be `GGUF` at the byte level: `0x47` `0x47` `0x55` `0x46`. + uint32_t magic; + // The version of the format implemented. + // Must be `3` for version described in this spec. + uint32_t version; + // The number of tensors in the file. + // This is explicit, instead of being included in the metadata, to ensure + // it is always present for loading the tensors. + uint64_t tensor_count; + // The number of metadata key-value pairs. + uint64_t metadata_kv_count; +}; + +/* Key represneation in this library API. */ +typedef struct { + const char *name; + size_t namelen; + uint32_t type; + union gguf_value *val; +} gguf_key; + +typedef struct { + const char *name; + size_t namelen; + int type; +} gguf_value;