From 558c7c3c6db2e755939245df574a7d3f1629d6e6 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 27 Dec 2023 18:54:36 +0100 Subject: [PATCH] Clarify the need for FP16 implementation. --- fp16.c | 13 +++++++++---- fp16.h | 2 ++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/fp16.c b/fp16.c index 9a62719..fa90605 100644 --- a/fp16.c +++ b/fp16.c @@ -1,11 +1,16 @@ -#include -#include - -/* This code comes originally from: +/* Conversion from floats to FP16 and the other way around. + * This is useful as in GGUF files we have both FP16 tensors + * and quantized blocks where half-precisions floats are used + * to store the scaling factor (delta) and other parameters. + * + * This code comes originally from: * https://github.com/Maratyszcza/FP16/blob/master/include/fp16/fp16.h * * The original code is MIT licensed. */ +#include +#include + static inline float fp32_from_bits(uint32_t w) { union { uint32_t as_bits; diff --git a/fp16.h b/fp16.h index 32543b6..ad26ea7 100644 --- a/fp16.h +++ b/fp16.h @@ -1,3 +1,5 @@ +/* See fp16.c. */ + #ifdef FP16_h #define FP16_h float from_half(uint16_t h);