GGUF parsing, initial design and functionalities.

This commit is contained in:
antirez
2023-12-24 10:36:26 +01:00
commit b47eaca8d1
4 changed files with 415 additions and 0 deletions

21
LICENSE Normal file
View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2022 Georgi Gerganov
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

7
Makefile Normal file
View File

@@ -0,0 +1,7 @@
all: gguf-show
gguf-show: gguf-show.c gguf.h
$(CC) gguf-show.c -g -ggdb -Wall -W -pedantic -O2 -o gguf-show
clean:
rm -rf gguf-show

254
gguf-show.c Normal file
View File

@@ -0,0 +1,254 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include "gguf.h"
typedef struct {
int fd;
uint8_t *data; // Memory mapped data.
uint64_t size; // Total file size.
struct gguf_header *header; // GUFF file header info.
uint32_t left_kv; // Number of key-value pairs yet to read.
uint32_t left_tensors; // Number of tensors yet to read.
uint64_t off; // Offset of the next item to parse.
} gguf_ctx;
/* Open a GGUF file and return a parsing context. */
gguf_ctx *gguf_init(char *filename) {
struct stat sb;
int fd = open(filename,O_RDONLY);
if (fd == -1) return NULL;
if (fstat(fd,&sb) == -1) {
close(fd);
return NULL;
}
/* Now that we have an open file and its total size, let's
* mmap it. */
void *mapped = mmap(0,sb.st_size,PROT_READ,MAP_PRIVATE,fd,0);
if (mapped == MAP_FAILED) {
close(fd);
return NULL;
}
/* Minimal sanity check... */
if (sb.st_size < (signed)sizeof(struct gguf_header) ||
memcmp(mapped,"GGUF",4) != 0)
{
errno = EINVAL;
return NULL;
}
/* Mapping successful. We can create our context object. */
gguf_ctx *ctx = malloc(sizeof(*ctx));
ctx->fd = fd;
ctx->data = mapped;
ctx->header = mapped;
ctx->size = sb.st_size;
ctx->off = sizeof(struct gguf_header);
ctx->left_kv = ctx->header->metadata_kv_count;
ctx->left_tensors = ctx->header->tensor_count;
return ctx;
}
/* Cleanup needed after gguf_init(), to terminate the context
* and cleanup resources. */
void gguf_end(gguf_ctx *ctx) {
if (ctx == NULL) return;
munmap(ctx->data,ctx->size);
close(ctx->fd);
free(ctx);
}
/* Parse the next key. Returns key information into 'key'.
* The function return value is 1 is a key was returned, or 0
* if there are no longer keys to process in this GGUF file. */
int gguf_get_key(gguf_ctx *ctx, gguf_key *key) {
if (ctx->left_kv == 0) return 0;
ctx->left_kv--;
struct gguf_string *str = (struct gguf_string*) (ctx->data+ctx->off);
key->namelen = str->len;
key->name = str->string;
uint32_t *type = (uint32_t*) (ctx->data+ctx->off+8+str->len);
key->type = *type;
ctx->off += 8+str->len+4; // Skip prefixed len + string + type.
key->val = (void*)(ctx->data+ctx->off);
return 1;
}
/* Return the value type name given the type ID. */
const char *gguf_get_value_type_name(uint32_t type) {
if (type >= sizeof(gguf_value_name)/sizeof(char*)) return "unknown";
return gguf_value_name[type];
}
/* Return the length of the value pointed by 'val' of type 'type'.
* For the array type the length can't be inferred without consuming
* it, so 0 is returned. */
uint64_t gguf_value_len(uint32_t type, union gguf_value *val) {
uint64_t valuelen = 0;
switch(type) {
case GGUF_VALUE_TYPE_BOOL:
case GGUF_VALUE_TYPE_UINT8:
case GGUF_VALUE_TYPE_INT8:
valuelen = 1; break;
case GGUF_VALUE_TYPE_UINT16:
case GGUF_VALUE_TYPE_INT16:
valuelen = 2; break;
case GGUF_VALUE_TYPE_UINT32:
case GGUF_VALUE_TYPE_INT32:
case GGUF_VALUE_TYPE_FLOAT32:
valuelen = 4; break;
case GGUF_VALUE_TYPE_UINT64:
case GGUF_VALUE_TYPE_INT64:
case GGUF_VALUE_TYPE_FLOAT64:
valuelen = 8; break;
case GGUF_VALUE_TYPE_STRING:
valuelen = 8+val->string.len; break;
}
return valuelen;
}
/* This function can be called after gguf_get_key(), since the context
* offset will be in the position of a value.
*
* The function will process the value, including nested values (in the
* case of an array value), and for each value will call the specified
* callback. As a side effect of calling this function, the context offset
* is advanced to consume the value.
*
* If the callback is set to NULL, no callback will be called,
* but the value will be consumed, so that it will be possible
* to call gguf_get_key() or gguf_get_tensor() to continue reading
* the file.
*
* When the callback is called, it gets the argument 'privdata' and 'in_array'
* as passed to this function. This is useful if the callback needs
* to take state (for pretty printing or alike) and to know if the
* elements it is processing belong to an array.
*
* The value of 'in_array' is the 1-based index of the element being
* processed.
*
* In the case of arrays, callbacks are also called with the special
* type ARRAY_START / ARRAY_END at the start/end of the array
* processing. */
void gguf_do_with_value(gguf_ctx *ctx, uint32_t type, union gguf_value *val,
void *privdata, uint64_t in_array, uint64_t array_len,
void(*callback)(void *privdata, uint32_t type,
union gguf_value *val, uint64_t in_array,
uint64_t array_len))
{
if (type == GGUF_VALUE_TYPE_ARRAY) {
uint32_t etype; // Elements type.
uint64_t len; // Number of elements.
etype = val->array.type;
len = val->array.len;
//exit(1);
ctx->off += 4+8; // Skip elements type / array length.
callback(privdata,GGUF_VALUE_TYPE_ARRAY_START,val,in_array,len);
for (uint64_t j = 0; j < len; j++) {
val = (union gguf_value*)(ctx->data+ctx->off);
gguf_do_with_value(ctx,etype,val,privdata,j+1,len,callback);
/* As a side effect of calling gguf_do_with_value() ctx->off
* will be update, so 'val' will be set to the next element. */
}
callback(privdata,GGUF_VALUE_TYPE_ARRAY_END,NULL,in_array,len);
} else {
callback(privdata,type,val,in_array,array_len);
ctx->off += gguf_value_len(type,val);
}
}
struct gguf_print_options {
uint64_t max_array_items; // Don't print more than N items.
};
/* Print a GGUF value. 'privdata' is used to pass guff_print_options and
* may be NULL if no options are provided.
*
* The function is designed to be used as a callback of gguf_do_with_value(). */
void gguf_print_value_callback(void *privdata, uint32_t type, union gguf_value *val, uint64_t in_array, uint64_t array_len) {
(void) privdata;
switch (type) {
case GGUF_VALUE_TYPE_ARRAY_START:
printf("[(%llu items)",array_len); break;
case GGUF_VALUE_TYPE_ARRAY_END:
printf("]"); break;
case GGUF_VALUE_TYPE_UINT8:
printf("%u", val->uint8); break;
case GGUF_VALUE_TYPE_INT8:
printf("%d", val->int8); break;
case GGUF_VALUE_TYPE_UINT16:
printf("%u", val->uint16); break;
case GGUF_VALUE_TYPE_INT16:
printf("%d", val->int16); break;
case GGUF_VALUE_TYPE_UINT32:
printf("%u", val->uint32); break;
case GGUF_VALUE_TYPE_INT32:
printf("%d", val->int32); break;
case GGUF_VALUE_TYPE_FLOAT32:
printf("%f", val->float32); break;
case GGUF_VALUE_TYPE_BOOL:
if (val->boolval == 0 || val->boolval == 1)
printf("%s", val->boolval ? "true" : "false");
else
printf("Invalid boolean value %d", val->boolval);
break;
case GGUF_VALUE_TYPE_STRING:
printf("%.*s", (int)val->string.len, val->string.string); break;
case GGUF_VALUE_TYPE_UINT64:
printf("%llu", val->uint64); break;
case GGUF_VALUE_TYPE_INT64:
printf("%lld", val->int64); break;
case GGUF_VALUE_TYPE_FLOAT64:
printf("%lf", val->float64); break;
default:
printf("Unknown type\n");
break;
}
if (in_array && in_array != array_len) printf(", ");
}
/* Print the current value, including arrays. As a side effect
* the value will be consumed from the context, that will now point
* to the next item in the GGUF file. */
void gguf_print_value(gguf_ctx *ctx, uint32_t type, union gguf_value *val) {
gguf_do_with_value(ctx,type,val,NULL,0,0,gguf_print_value_callback);
}
int main(int argc, char **argv) {
if (argc != 2) {
printf("Usage: %s <filename>\n",argv[0]);
exit(1);
}
gguf_ctx *ctx = gguf_init(argv[1]);
if (ctx == NULL) {
perror("Opening GGUF file");
exit(1);
}
/* Show general information about the neural network. */
printf("%s (ver %d): %llu key-value pairs, %llu tensors\n",
argv[1],
(int)ctx->header->version,
(unsigned long long)ctx->header->metadata_kv_count,
(unsigned long long)ctx->header->tensor_count);
/* Show all the key-value pairs. */
gguf_key key;
while (gguf_get_key(ctx,&key)) {
printf("%.*s: [%s] ", (int)key.namelen, key.name, gguf_get_value_type_name(key.type));
gguf_print_value(ctx,key.type,key.val);
printf("\n");
}
return 0;
}

133
gguf.h Normal file
View File

@@ -0,0 +1,133 @@
/* This code is adapted from https://github.com/ggerganov/ggml/
* The changes are copyright (C) 2024 Salvatore Sanfilippo <antirez@gmail.com>
* See LICENSE for licensing info. */
#include <stdint.h>
enum gguf_tensor_type {
GUFF_TYPE_F32 = 0,
GUFF_TYPE_F16 = 1,
GUFF_TYPE_Q4_0 = 2,
GUFF_TYPE_Q4_1 = 3,
// GUFF_TYPE_Q4_2 = 4, support has been removed
// GUFF_TYPE_Q4_3 (5) support has been removed
GUFF_TYPE_Q5_0 = 6,
GUFF_TYPE_Q5_1 = 7,
GUFF_TYPE_Q8_0 = 8,
GUFF_TYPE_Q8_1 = 9,
// k-quantizations
GUFF_TYPE_Q2_K = 10,
GUFF_TYPE_Q3_K = 11,
GUFF_TYPE_Q4_K = 12,
GUFF_TYPE_Q5_K = 13,
GUFF_TYPE_Q6_K = 14,
GUFF_TYPE_Q8_K = 15,
GUFF_TYPE_I8,
GUFF_TYPE_I16,
GUFF_TYPE_I32,
GUFF_TYPE_COUNT,
};
enum gguf_value_type {
// The value is a 8-bit unsigned integer.
GGUF_VALUE_TYPE_UINT8 = 0,
// The value is a 8-bit signed integer.
GGUF_VALUE_TYPE_INT8 = 1,
// The value is a 16-bit unsigned little-endian integer.
GGUF_VALUE_TYPE_UINT16 = 2,
// The value is a 16-bit signed little-endian integer.
GGUF_VALUE_TYPE_INT16 = 3,
// The value is a 32-bit unsigned little-endian integer.
GGUF_VALUE_TYPE_UINT32 = 4,
// The value is a 32-bit signed little-endian integer.
GGUF_VALUE_TYPE_INT32 = 5,
// The value is a 32-bit IEEE754 floating point number.
GGUF_VALUE_TYPE_FLOAT32 = 6,
// The value is a boolean.
// 1-byte value where 0 is false and 1 is true.
// Anything else is invalid, and should be treated as either the model
// being invalid or the reader being buggy.
GGUF_VALUE_TYPE_BOOL = 7,
// The value is a UTF-8 non-null-terminated string, with length prepended.
GGUF_VALUE_TYPE_STRING = 8,
// The value is an array of other values, with the length and type
// prepended. Arrays can be nested, and the length of the array is the
// number of elements in the array, not the number of bytes.
GGUF_VALUE_TYPE_ARRAY = 9,
// The value is a 64-bit unsigned little-endian integer.
GGUF_VALUE_TYPE_UINT64 = 10,
// The value is a 64-bit signed little-endian integer.
GGUF_VALUE_TYPE_INT64 = 11,
// The value is a 64-bit IEEE754 floating point number.
GGUF_VALUE_TYPE_FLOAT64 = 12,
// Special values used by the callbacks of gguf_do_with_value().
GGUF_VALUE_TYPE_ARRAY_START = 100,
GGUF_VALUE_TYPE_ARRAY_END = 101
};
const char *gguf_value_name[] = {
"uint8", "int8", "uint16", "int16", "uint32", "int32",
"float32", "bool", "string", "array", "uint64", "int64",
"float64"
};
// A string in GGUF.
struct gguf_string {
// The length of the string, in bytes.
uint64_t len;
// The string as a UTF-8 non-null-terminated string.
char string[];
};
// Union of possible values.
union gguf_value {
uint8_t uint8;
int8_t int8;
uint16_t uint16;
int16_t int16;
uint32_t uint32;
int32_t int32;
float float32;
uint64_t uint64;
int64_t int64;
double float64;
uint8_t boolval;
struct gguf_string string;
struct {
// Any value type is valid, including arrays.
uint32_t type;
// Number of elements, not bytes
uint64_t len;
// The array of values follow...
} __attribute__((packed)) array;
};
// Header
struct gguf_header {
// Magic number to announce that this is a GGUF file.
// Must be `GGUF` at the byte level: `0x47` `0x47` `0x55` `0x46`.
uint32_t magic;
// The version of the format implemented.
// Must be `3` for version described in this spec.
uint32_t version;
// The number of tensors in the file.
// This is explicit, instead of being included in the metadata, to ensure
// it is always present for loading the tensors.
uint64_t tensor_count;
// The number of metadata key-value pairs.
uint64_t metadata_kv_count;
};
/* Key represneation in this library API. */
typedef struct {
const char *name;
size_t namelen;
uint32_t type;
union gguf_value *val;
} gguf_key;
typedef struct {
const char *name;
size_t namelen;
int type;
} gguf_value;