Files
gguf-tools/gguflib.h
Justine Tunney ede59bb742 Add BF16 support and fix warnings
This change updates the data type definitions to be the same as the
latest source code. Support for the bfloat16 data type is available
however it can't interpret the IQ quantization formats yet. Cleanup
of compiler warnings and other nits have been fixed, but behavioral
changes have been avoided, and no new features are as of yet added.
2024-05-25 22:58:50 -07:00

202 lines
7.5 KiB
C

/* Copyright (C) 2024 Salvatore Sanfilippo <antirez@gmail.com>
* See LICENSE for licensing info.
*
* GGUF enums / structures are partially adapted
* the official GGUF implementation at from https://github.com/ggerganov/ggml/
*/
#ifndef GGUFLIB_H
#define GGUFLIB_H
#include <stdint.h>
/* ============================ Enums and structures ======================== */
/* Flags that can be used in different functions with the same meaning. */
#define GGUF_NONE 0 // No flags.
#define GGUF_OVERWRITE (1<<0) // Overwrite the destination object.
enum gguf_tensor_type {
GGUF_TYPE_F32 = 0,
GGUF_TYPE_F16 = 1,
GGUF_TYPE_Q4_0 = 2,
GGUF_TYPE_Q4_1 = 3,
// GGUF_TYPE_Q4_2 = 4, support has been removed
// GGUF_TYPE_Q4_3 (5) support has been removed
GGUF_TYPE_Q5_0 = 6,
GGUF_TYPE_Q5_1 = 7,
GGUF_TYPE_Q8_0 = 8,
GGUF_TYPE_Q8_1 = 9,
GGUF_TYPE_Q2_K = 10,
GGUF_TYPE_Q3_K = 11,
GGUF_TYPE_Q4_K = 12,
GGUF_TYPE_Q5_K = 13,
GGUF_TYPE_Q6_K = 14,
GGUF_TYPE_Q8_K = 15,
GGUF_TYPE_IQ2_XXS = 16,
GGUF_TYPE_IQ2_XS = 17,
GGUF_TYPE_IQ3_XXS = 18,
GGUF_TYPE_IQ1_S = 19,
GGUF_TYPE_IQ4_NL = 20,
GGUF_TYPE_IQ3_S = 21,
GGUF_TYPE_IQ2_S = 22,
GGUF_TYPE_IQ4_XS = 23,
GGUF_TYPE_I8 = 24,
GGUF_TYPE_I16 = 25,
GGUF_TYPE_I32 = 26,
GGUF_TYPE_I64 = 27,
GGUF_TYPE_F64 = 28,
GGUF_TYPE_IQ1_M = 29,
GGUF_TYPE_BF16 = 30,
GGUF_TYPE_COUNT,
};
enum gguf_value_type {
// The value is a 8-bit unsigned integer.
GGUF_VALUE_TYPE_UINT8 = 0,
// The value is a 8-bit signed integer.
GGUF_VALUE_TYPE_INT8 = 1,
// The value is a 16-bit unsigned little-endian integer.
GGUF_VALUE_TYPE_UINT16 = 2,
// The value is a 16-bit signed little-endian integer.
GGUF_VALUE_TYPE_INT16 = 3,
// The value is a 32-bit unsigned little-endian integer.
GGUF_VALUE_TYPE_UINT32 = 4,
// The value is a 32-bit signed little-endian integer.
GGUF_VALUE_TYPE_INT32 = 5,
// The value is a 32-bit IEEE754 floating point number.
GGUF_VALUE_TYPE_FLOAT32 = 6,
// The value is a boolean.
// 1-byte value where 0 is false and 1 is true.
// Anything else is invalid, and should be treated as either the model
// being invalid or the reader being buggy.
GGUF_VALUE_TYPE_BOOL = 7,
// The value is a UTF-8 non-null-terminated string, with length prepended.
GGUF_VALUE_TYPE_STRING = 8,
// The value is an array of other values, with the length and type
// prepended. Arrays can be nested, and the length of the array is the
// number of elements in the array, not the number of bytes.
GGUF_VALUE_TYPE_ARRAY = 9,
// The value is a 64-bit unsigned little-endian integer.
GGUF_VALUE_TYPE_UINT64 = 10,
// The value is a 64-bit signed little-endian integer.
GGUF_VALUE_TYPE_INT64 = 11,
// The value is a 64-bit IEEE754 floating point number.
GGUF_VALUE_TYPE_FLOAT64 = 12,
// Special values used by the callbacks of gguf_do_with_value().
GGUF_VALUE_TYPE_ARRAY_START = 100,
GGUF_VALUE_TYPE_ARRAY_END = 101
};
// A string in GGUF.
struct gguf_string {
// The length of the string, in bytes.
uint64_t len;
// The string as a UTF-8 non-null-terminated string.
char string[];
};
// Union of possible values.
union gguf_value {
uint8_t uint8;
int8_t int8;
uint16_t uint16;
int16_t int16;
uint32_t uint32;
int32_t int32;
float float32;
uint64_t uint64;
int64_t int64;
double float64;
uint8_t boolval;
struct gguf_string string;
struct {
// Any value type is valid, including arrays.
uint32_t type;
// Number of elements, not bytes
uint64_t len;
// The array of values follow...
} __attribute__((packed)) array;
};
// Header
struct gguf_header {
// Magic number to announce that this is a GGUF file.
// Must be `GGUF` at the byte level: `0x47` `0x47` `0x55` `0x46`.
uint32_t magic;
// The version of the format implemented.
// Must be `3` for version described in this spec.
uint32_t version;
// The number of tensors in the file.
// This is explicit, instead of being included in the metadata, to ensure
// it is always present for loading the tensors.
uint64_t tensor_count;
// The number of metadata key-value pairs.
uint64_t metadata_kv_count;
};
/* Key representation in this library API. */
typedef struct {
const char *name;
size_t namelen;
uint32_t type;
union gguf_value *val;
} gguf_key;
/* Tensor representation in this library API. */
#define GGUF_TENSOR_MAX_DIM 8 // Future-proof: actual limit is 4.
typedef struct {
const char *name;
size_t namelen;
uint32_t type; // Tensor type (enum gguf_tensor_type).
uint32_t ndim; // Number of dimensions of the tensor.
uint64_t dim[GGUF_TENSOR_MAX_DIM]; // Dimensions (Eg. [512, 1024, 1, 1]).
uint64_t offset; // Offset from start of file.
uint64_t bsize; // Total size in bytes.
uint64_t num_weights; // Total number of parameters.
uint8_t *weights_data; // Pointer to the mmaped file.
} gguf_tensor;
/* The context you get after opening a GGUF file with gguf_init(). */
typedef struct {
int fd;
uint8_t *data; // Memory mapped data.
uint64_t size; // Total file size.
struct gguf_header *header; // GUFF file header info.
uint64_t left_kv; // Number of key-value pairs yet to read.
uint64_t left_tensors; // Number of tensors yet to read.
uint64_t off; // Offset of the next item to parse.
uint64_t data_off; // Offset of tensor data section. This
// is only set when all the kv/tensor header
// entries are processed. Initially 0.
uint64_t alignment; // File data alignment. Default: 32 bytes.
} gguf_ctx;
/* =============================== Prototypes =============================== */
gguf_ctx *gguf_open(const char *filename);
gguf_ctx *gguf_create(const char *filename, int flags);
int gguf_remap(gguf_ctx *ctx);
void gguf_rewind(gguf_ctx *ctx);
void gguf_close(gguf_ctx *ctx);
int gguf_get_key(gguf_ctx *ctx, gguf_key *key);
int gguf_get_tensor(gguf_ctx *ctx, gguf_tensor *tensor);
const char *gguf_get_value_type_name(uint32_t type);
const char *gguf_get_tensor_type_name(uint32_t type);
void gguf_do_with_value(gguf_ctx *ctx, uint32_t type, union gguf_value *val,
void *privdata, uint64_t in_array, uint64_t array_len,
void(*callback)(void *privdata, uint32_t type,
union gguf_value *val, uint64_t in_array,
uint64_t array_len));
void gguf_print_value(gguf_ctx *ctx, uint32_t type, union gguf_value *val, int full);
int gguf_append_kv(gguf_ctx *ctx, const char *keyname, uint64_t keylen, uint32_t type, void *val, uint64_t len);
int gguf_append_tensor_info(gguf_ctx *ctx, const char *tensorname, uint64_t namelen, uint32_t num_dim, uint64_t *dim, uint32_t type, uint64_t offset);
int gguf_append_tensor_data(gguf_ctx *ctx, void *tensor, uint64_t tensor_size);
uint64_t gguf_get_alignment_padding(uint64_t alignment, uint64_t offset);
void gguf_skip_key_values_section(gguf_ctx *ctx);
float *gguf_tensor_to_float(gguf_tensor *tensor);
int16_t *gguf_tensor_to_f16(gguf_tensor *tensor);
int16_t *gguf_tensor_to_bf16(gguf_tensor *tensor);
#endif