Clarify the need for FP16 implementation.

2025-12-16 00:18:52 +08:00 · 2023-12-27 18:54:36 +01:00
parent bd4ecbda94
commit 558c7c3c6d
2 changed files with 11 additions and 4 deletions
--- a/fp16.c
+++ b/fp16.c
@@ -1,11 +1,16 @@
-#include <stdint.h>
-#include <math.h>
-
-/* This code comes originally from:
+/* Conversion from floats to FP16 and the other way around.
+ * This is useful as in GGUF files we have both FP16 tensors
+ * and quantized blocks where half-precisions floats are used
+ * to store the scaling factor (delta) and other parameters.
+ *
+ * This code comes originally from:
 * https://github.com/Maratyszcza/FP16/blob/master/include/fp16/fp16.h
 *
 * The original code is MIT licensed. */

+#include <stdint.h>
+#include <math.h>
+
 static inline float fp32_from_bits(uint32_t w) {
    union {
        uint32_t as_bits;
--- a/fp16.h
+++ b/fp16.h
@@ -1,3 +1,5 @@
+/* See fp16.c. */
+
 #ifdef FP16_h
 #define FP16_h
 float from_half(uint16_t h);