Files
gguf-tools/gguflib.h
2023-12-28 12:31:35 +01:00

185 lines
6.8 KiB
C

/* Copyright (C) 2024 Salvatore Sanfilippo <antirez@gmail.com>
* See LICENSE for licensing info.
*
* GGUF enums / structures are partially adapted
* the official GGUF implementation at from https://github.com/ggerganov/ggml/
*/
#ifndef GGUFLIB_H
#define GGUFLIB_H
#include <stdint.h>
/* ============================ Enums and structures ======================== */
enum gguf_tensor_type {
GGUF_TYPE_F32 = 0,
GGUF_TYPE_F16 = 1,
GGUF_TYPE_Q4_0 = 2,
GGUF_TYPE_Q4_1 = 3,
// GGUF_TYPE_Q4_2 = 4, support has been removed
// GGUF_TYPE_Q4_3 (5) support has been removed
GGUF_TYPE_Q5_0 = 6,
GGUF_TYPE_Q5_1 = 7,
GGUF_TYPE_Q8_0 = 8,
GGUF_TYPE_Q8_1 = 9,
// k-quantizations
GGUF_TYPE_Q2_K = 10,
GGUF_TYPE_Q3_K = 11,
GGUF_TYPE_Q4_K = 12,
GGUF_TYPE_Q5_K = 13,
GGUF_TYPE_Q6_K = 14,
GGUF_TYPE_Q8_K = 15,
GGUF_TYPE_I8,
GGUF_TYPE_I16,
GGUF_TYPE_I32,
GGUF_TYPE_COUNT,
};
enum gguf_value_type {
// The value is a 8-bit unsigned integer.
GGUF_VALUE_TYPE_UINT8 = 0,
// The value is a 8-bit signed integer.
GGUF_VALUE_TYPE_INT8 = 1,
// The value is a 16-bit unsigned little-endian integer.
GGUF_VALUE_TYPE_UINT16 = 2,
// The value is a 16-bit signed little-endian integer.
GGUF_VALUE_TYPE_INT16 = 3,
// The value is a 32-bit unsigned little-endian integer.
GGUF_VALUE_TYPE_UINT32 = 4,
// The value is a 32-bit signed little-endian integer.
GGUF_VALUE_TYPE_INT32 = 5,
// The value is a 32-bit IEEE754 floating point number.
GGUF_VALUE_TYPE_FLOAT32 = 6,
// The value is a boolean.
// 1-byte value where 0 is false and 1 is true.
// Anything else is invalid, and should be treated as either the model
// being invalid or the reader being buggy.
GGUF_VALUE_TYPE_BOOL = 7,
// The value is a UTF-8 non-null-terminated string, with length prepended.
GGUF_VALUE_TYPE_STRING = 8,
// The value is an array of other values, with the length and type
// prepended. Arrays can be nested, and the length of the array is the
// number of elements in the array, not the number of bytes.
GGUF_VALUE_TYPE_ARRAY = 9,
// The value is a 64-bit unsigned little-endian integer.
GGUF_VALUE_TYPE_UINT64 = 10,
// The value is a 64-bit signed little-endian integer.
GGUF_VALUE_TYPE_INT64 = 11,
// The value is a 64-bit IEEE754 floating point number.
GGUF_VALUE_TYPE_FLOAT64 = 12,
// Special values used by the callbacks of gguf_do_with_value().
GGUF_VALUE_TYPE_ARRAY_START = 100,
GGUF_VALUE_TYPE_ARRAY_END = 101
};
// A string in GGUF.
struct gguf_string {
// The length of the string, in bytes.
uint64_t len;
// The string as a UTF-8 non-null-terminated string.
char string[];
};
// Union of possible values.
union gguf_value {
uint8_t uint8;
int8_t int8;
uint16_t uint16;
int16_t int16;
uint32_t uint32;
int32_t int32;
float float32;
uint64_t uint64;
int64_t int64;
double float64;
uint8_t boolval;
struct gguf_string string;
struct {
// Any value type is valid, including arrays.
uint32_t type;
// Number of elements, not bytes
uint64_t len;
// The array of values follow...
} __attribute__((packed)) array;
};
// Header
struct gguf_header {
// Magic number to announce that this is a GGUF file.
// Must be `GGUF` at the byte level: `0x47` `0x47` `0x55` `0x46`.
uint32_t magic;
// The version of the format implemented.
// Must be `3` for version described in this spec.
uint32_t version;
// The number of tensors in the file.
// This is explicit, instead of being included in the metadata, to ensure
// it is always present for loading the tensors.
uint64_t tensor_count;
// The number of metadata key-value pairs.
uint64_t metadata_kv_count;
};
/* Key represneation in this library API. */
typedef struct {
const char *name;
size_t namelen;
uint32_t type;
union gguf_value *val;
} gguf_key;
/* Tensor representation in this library API. */
#define GGUF_TENSOR_MAX_DIM 8 // Future-proof: actual limit is 4.
typedef struct {
const char *name;
size_t namelen;
uint32_t type; // Tensor type (enum gguf_tensor_type).
uint32_t ndim; // Number of dimensions of the tensor.
uint64_t dim[GGUF_TENSOR_MAX_DIM]; // Dimensions (Eg. [512, 1024, 1, 1]).
uint64_t offset; // Offset from start of file.
uint64_t bsize; // Total size in bytes.
uint64_t num_weights; // Total number of parameters.
uint8_t *weights_data; // Pointer to the mmaped file.
} gguf_tensor;
/* The context you get after opening a GGUF file with gguf_init(). */
typedef struct {
int fd;
uint8_t *data; // Memory mapped data.
uint64_t size; // Total file size.
struct gguf_header *header; // GUFF file header info.
uint32_t left_kv; // Number of key-value pairs yet to read.
uint32_t left_tensors; // Number of tensors yet to read.
uint64_t off; // Offset of the next item to parse.
uint64_t data_off; // Offset of tensor data section. This
// is only set when all the kv/tensor header
// entries are processed. Initially 0.
uint64_t alignment; // File data alignment. Default: 32 bytes.
} gguf_ctx;
/* =============================== Prototypes =============================== */
gguf_ctx *gguf_init(const char *filename);
gguf_ctx *gguf_create(const char *filename);
int gguf_remap(gguf_ctx *ctx);
void gguf_rewind(gguf_ctx *ctx);
void gguf_end(gguf_ctx *ctx);
int gguf_get_key(gguf_ctx *ctx, gguf_key *key);
int gguf_get_tensor(gguf_ctx *ctx, gguf_tensor *tensor);
const char *gguf_get_value_type_name(uint32_t type);
const char *gguf_get_tensor_type_name(uint32_t type);
void gguf_do_with_value(gguf_ctx *ctx, uint32_t type, union gguf_value *val,
void *privdata, uint64_t in_array, uint64_t array_len,
void(*callback)(void *privdata, uint32_t type,
union gguf_value *val, uint64_t in_array,
uint64_t array_len));
void gguf_print_value(gguf_ctx *ctx, uint32_t type, union gguf_value *val, int full);
int gguf_append_kv(gguf_ctx *ctx, const char *keyname, uint64_t keylen, uint32_t type, void *val, uint64_t len);
int gguf_append_tensor_info(gguf_ctx *ctx, const char *tensorname, uint64_t namelen, uint32_t num_dim, uint64_t *dim, uint32_t type, uint64_t offset);
int gguf_append_tensor_data(gguf_ctx *ctx, void *tensor, uint64_t tensor_size);
uint64_t gguf_get_alignment_padding(uint64_t alignment, uint64_t offset);
void gguf_skip_key_values_section(gguf_ctx *ctx);
float *gguf_tensor_to_float(gguf_tensor *tensor);
#endif