Clarify the need for FP16 implementation.

This commit is contained in:
antirez
2023-12-27 18:54:36 +01:00
parent bd4ecbda94
commit 558c7c3c6d
2 changed files with 11 additions and 4 deletions

13
fp16.c
View File

@@ -1,11 +1,16 @@
#include <stdint.h>
#include <math.h>
/* This code comes originally from:
/* Conversion from floats to FP16 and the other way around.
* This is useful as in GGUF files we have both FP16 tensors
* and quantized blocks where half-precisions floats are used
* to store the scaling factor (delta) and other parameters.
*
* This code comes originally from:
* https://github.com/Maratyszcza/FP16/blob/master/include/fp16/fp16.h
*
* The original code is MIT licensed. */
#include <stdint.h>
#include <math.h>
static inline float fp32_from_bits(uint32_t w) {
union {
uint32_t as_bits;

2
fp16.h
View File

@@ -1,3 +1,5 @@
/* See fp16.c. */
#ifdef FP16_h
#define FP16_h
float from_half(uint16_t h);