Add BF16 support and fix warnings

This change updates the data type definitions to be the same as the latest source code. Support for the bfloat16 data type is available however it can't interpret the IQ quantization formats yet. Cleanup of compiler warnings and other nits have been fixed, but behavioral changes have been avoided, and no new features are as of yet added.
2025-12-16 00:18:52 +08:00 · 2024-05-25 22:48:18 -07:00
parent 3e5c0a464d
commit ede59bb742
6 changed files with 201 additions and 27 deletions
--- a/gguflib.h
+++ b/gguflib.h
@@ -27,16 +27,27 @@ enum gguf_tensor_type {
    GGUF_TYPE_Q5_1 = 7,
    GGUF_TYPE_Q8_0 = 8,
    GGUF_TYPE_Q8_1 = 9,
-    // k-quantizations
    GGUF_TYPE_Q2_K = 10,
    GGUF_TYPE_Q3_K = 11,
    GGUF_TYPE_Q4_K = 12,
    GGUF_TYPE_Q5_K = 13,
    GGUF_TYPE_Q6_K = 14,
    GGUF_TYPE_Q8_K = 15,
-    GGUF_TYPE_I8,
-    GGUF_TYPE_I16,
-    GGUF_TYPE_I32,
+    GGUF_TYPE_IQ2_XXS = 16,
+    GGUF_TYPE_IQ2_XS = 17,
+    GGUF_TYPE_IQ3_XXS = 18,
+    GGUF_TYPE_IQ1_S = 19,
+    GGUF_TYPE_IQ4_NL = 20,
+    GGUF_TYPE_IQ3_S = 21,
+    GGUF_TYPE_IQ2_S = 22,
+    GGUF_TYPE_IQ4_XS = 23,
+    GGUF_TYPE_I8 = 24,
+    GGUF_TYPE_I16 = 25,
+    GGUF_TYPE_I32 = 26,
+    GGUF_TYPE_I64 = 27,
+    GGUF_TYPE_F64 = 28,
+    GGUF_TYPE_IQ1_M = 29,
+    GGUF_TYPE_BF16 = 30,
    GGUF_TYPE_COUNT,
 };

@@ -185,5 +196,6 @@ uint64_t gguf_get_alignment_padding(uint64_t alignment, uint64_t offset);
 void gguf_skip_key_values_section(gguf_ctx *ctx);
 float *gguf_tensor_to_float(gguf_tensor *tensor);
 int16_t *gguf_tensor_to_f16(gguf_tensor *tensor);
+int16_t *gguf_tensor_to_bf16(gguf_tensor *tensor);

 #endif