mirror of
https://github.com/antirez/gguf-tools.git
synced 2025-09-17 02:28:07 +08:00
Quantization functions refactoring.
This commit is contained in:
98
gguflib.c
98
gguflib.c
@@ -500,45 +500,34 @@ int gguf_append_tensor_data(gguf_ctx *ctx, void *tensor, uint64_t tensor_size) {
|
|||||||
|
|
||||||
/* ============================ GGUF dequantization ========================= */
|
/* ============================ GGUF dequantization ========================= */
|
||||||
|
|
||||||
/* Convert the specified tensor (quantized or not) into an array of
|
/* G8_0 blocks dequantization to floats.
|
||||||
* floats. The array is allocated with malloc(). If the tensor is already
|
* 'y' is supposed to have enough space for 'count' weights. */
|
||||||
* in FP32 floats format, it is just memcpy()-ed to the destination array.
|
void gguf_q8_0_to_float(void *weights_data, float *y, uint64_t count) {
|
||||||
*
|
|
||||||
* On OOM, NULL is returned. If the tensor format is not yet supported,
|
|
||||||
* NULL is returned as well, but errno is set to EINVAL. */
|
|
||||||
float *gguf_tensor_to_float(gguf_tensor *tensor) {
|
|
||||||
struct gguf_tensor_type_features *tf =
|
struct gguf_tensor_type_features *tf =
|
||||||
gguf_get_tensor_type_features(tensor->type);
|
gguf_get_tensor_type_features(GGUF_TYPE_Q8_0);
|
||||||
uint64_t block_size = tf->bytes_per_block;
|
|
||||||
float *f = malloc(tensor->num_weights*sizeof(float));
|
|
||||||
if (tensor->type == GGUF_TYPE_F32) {
|
|
||||||
memcpy(f, tensor->weights_data, tensor->num_weights*sizeof(float));
|
|
||||||
} else if (tensor->type == GGUF_TYPE_F16) {
|
|
||||||
uint64_t i = 0; // i-th weight to dequantize.
|
|
||||||
uint16_t *w16 = (uint16_t*) tensor->weights_data;
|
|
||||||
while(i < tensor->num_weights) {
|
|
||||||
f[i] = from_half(w16[i]);
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
} else if (tensor->type == GGUF_TYPE_Q8_0) {
|
|
||||||
/* Very simple layout: |16 bit scale|32 x 8bit weights|
|
/* Very simple layout: |16 bit scale|32 x 8bit weights|
|
||||||
* Each weight is scale * quantized_weight[0..31] */
|
* Each weight is scale * quantized_weight[0..31] */
|
||||||
int8_t *block = (int8_t*)tensor->weights_data;
|
int8_t *block = weights_data;
|
||||||
uint64_t i = 0; // i-th weight to dequantize.
|
uint64_t i = 0; // i-th weight to dequantize.
|
||||||
while(i < tensor->num_weights) {
|
while(i < count) {
|
||||||
/* For each block get the scale and convert all the
|
/* For each block get the scale and convert all the
|
||||||
* weights in the block. */
|
* weights in the block. */
|
||||||
float scale = from_half(*((uint16_t*)block));
|
float scale = from_half(*((uint16_t*)block));
|
||||||
for (uint32_t j = 0; j < tf->items_per_block; j++) {
|
for (uint32_t j = 0; j < tf->items_per_block; j++) {
|
||||||
f[i++] = block[j+2] * scale; // j+2 to skip the scale bytes.
|
y[i++] = block[j+2] * scale; // j+2 to skip the scale bytes.
|
||||||
if (i == tensor->num_weights) break;
|
if (i == count) break;
|
||||||
}
|
}
|
||||||
block += block_size; // Go to the next block.
|
block += tf->bytes_per_block; // Go to the next block.
|
||||||
}
|
}
|
||||||
} else if (tensor->type == GGUF_TYPE_Q4_K) {
|
}
|
||||||
uint8_t *block = (uint8_t*)tensor->weights_data;
|
|
||||||
|
/* G4_K blocks dequantization to floats.
|
||||||
|
* 'y' is supposed to have enough space for 'count' weights. */
|
||||||
|
void gguf_q4_k_to_float(void *weights_data, float *y, uint64_t count) {
|
||||||
|
uint8_t *block = weights_data;
|
||||||
uint64_t i = 0; // i-th weight to dequantize.
|
uint64_t i = 0; // i-th weight to dequantize.
|
||||||
while(i < tensor->num_weights) {
|
while(i < count) {
|
||||||
/* Q4_K super-blocks have 256 total weights, split in 8 sub-block.
|
/* Q4_K super-blocks have 256 total weights, split in 8 sub-block.
|
||||||
* Each 8 sub-blocks have a different set of scales/mins, so
|
* Each 8 sub-blocks have a different set of scales/mins, so
|
||||||
* there are 16 total values for scales/mins, but the scales/mins
|
* there are 16 total values for scales/mins, but the scales/mins
|
||||||
@@ -600,22 +589,26 @@ float *gguf_tensor_to_float(gguf_tensor *tensor) {
|
|||||||
/* First set: higher bits. */
|
/* First set: higher bits. */
|
||||||
for (uint32_t j = 0; j < 32; j++) {
|
for (uint32_t j = 0; j < 32; j++) {
|
||||||
uint8_t w = block[j] & 0xf;
|
uint8_t w = block[j] & 0xf;
|
||||||
f[i++] = w * scale - min;
|
y[i++] = w * scale - min;
|
||||||
if (i == tensor->num_weights) return f;
|
if (i == count) return;
|
||||||
}
|
}
|
||||||
/* Second set: lower bits. */
|
/* Second set: lower bits. */
|
||||||
for (uint32_t j = 0; j < 32; j++) {
|
for (uint32_t j = 0; j < 32; j++) {
|
||||||
uint8_t w = block[j] >> 4;
|
uint8_t w = block[j] >> 4;
|
||||||
f[i++] = w * scale - min;
|
y[i++] = w * scale - min;
|
||||||
if (i == tensor->num_weights) return f;
|
if (i == count) return;
|
||||||
}
|
}
|
||||||
block += 32; // Skip the two processed blocks.
|
block += 32; // Skip the two processed blocks.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (tensor->type == GGUF_TYPE_Q6_K) {
|
}
|
||||||
uint8_t *block = (uint8_t*)tensor->weights_data;
|
|
||||||
|
/* G6_K blocks dequantization to floats.
|
||||||
|
* 'y' is supposed to have enough space for 'count' weights. */
|
||||||
|
void gguf_q6_k_to_float(void *weights_data, float *y, uint64_t count) {
|
||||||
|
uint8_t *block = weights_data;
|
||||||
uint64_t i = 0; // i-th weight to dequantize.
|
uint64_t i = 0; // i-th weight to dequantize.
|
||||||
while(i < tensor->num_weights) {
|
while(i < count) {
|
||||||
/* Q6_K super-blocks have 256 total weights, split in 16 sub-block
|
/* Q6_K super-blocks have 256 total weights, split in 16 sub-block
|
||||||
* of 16 elements. There are no mins, just scales. Each sub-block
|
* of 16 elements. There are no mins, just scales. Each sub-block
|
||||||
* have a block-specific scale quantized at 8 bits via a single
|
* have a block-specific scale quantized at 8 bits via a single
|
||||||
@@ -670,12 +663,12 @@ float *gguf_tensor_to_float(gguf_tensor *tensor) {
|
|||||||
int8_t *scales = (int8_t*)block+128+64;
|
int8_t *scales = (int8_t*)block+128+64;
|
||||||
for (int cluster = 0; cluster < 2; cluster++) {
|
for (int cluster = 0; cluster < 2; cluster++) {
|
||||||
for (uint64_t j = 0; j < 128; j++) {
|
for (uint64_t j = 0; j < 128; j++) {
|
||||||
f[i] = (super_scale * scales[j/16]) *
|
y[i] = (super_scale * scales[j/16]) *
|
||||||
((int8_t)
|
((int8_t)
|
||||||
((((L[j%64] >> (j/64*4)) & 0xF) |
|
((((L[j%64] >> (j/64*4)) & 0xF) |
|
||||||
(((H[j%32] >> (j/32*2)) & 3) << 4)))-32);
|
(((H[j%32] >> (j/32*2)) & 3) << 4)))-32);
|
||||||
i++;
|
i++;
|
||||||
if (i == tensor->num_weights) return f;
|
if (i == count) return;
|
||||||
}
|
}
|
||||||
L += 64;
|
L += 64;
|
||||||
H += 32;
|
H += 32;
|
||||||
@@ -683,6 +676,37 @@ float *gguf_tensor_to_float(gguf_tensor *tensor) {
|
|||||||
}
|
}
|
||||||
block += 128+64+16+2; // Go to the next block.
|
block += 128+64+16+2; // Go to the next block.
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* FP16 blocks dequantization to floats.
|
||||||
|
* 'y' is supposed to have enough space for 'count' weights. */
|
||||||
|
void gguf_f16_to_float(void *weights_data, float *y, uint64_t count) {
|
||||||
|
uint64_t i = 0; // i-th weight to dequantize.
|
||||||
|
uint16_t *w16 = weights_data;
|
||||||
|
while(i < count) {
|
||||||
|
y[i] = from_half(w16[i]);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Convert the specified tensor (quantized or not) into an array of
|
||||||
|
* floats. The array is allocated with malloc(). If the tensor is already
|
||||||
|
* in FP32 floats format, it is just memcpy()-ed to the destination array.
|
||||||
|
*
|
||||||
|
* On OOM, NULL is returned. If the tensor format is not yet supported,
|
||||||
|
* NULL is returned as well, but errno is set to EINVAL. */
|
||||||
|
float *gguf_tensor_to_float(gguf_tensor *tensor) {
|
||||||
|
float *f = malloc(tensor->num_weights*sizeof(float));
|
||||||
|
if (tensor->type == GGUF_TYPE_F32) {
|
||||||
|
memcpy(f, tensor->weights_data, tensor->num_weights*sizeof(float));
|
||||||
|
} else if (tensor->type == GGUF_TYPE_F16) {
|
||||||
|
gguf_f16_to_float(tensor->weights_data, f, tensor->num_weights);
|
||||||
|
} else if (tensor->type == GGUF_TYPE_Q8_0) {
|
||||||
|
gguf_q8_0_to_float(tensor->weights_data, f, tensor->num_weights);
|
||||||
|
} else if (tensor->type == GGUF_TYPE_Q4_K) {
|
||||||
|
gguf_q4_k_to_float(tensor->weights_data, f, tensor->num_weights);
|
||||||
|
} else if (tensor->type == GGUF_TYPE_Q6_K) {
|
||||||
|
gguf_q6_k_to_float(tensor->weights_data, f, tensor->num_weights);
|
||||||
} else {
|
} else {
|
||||||
errno = EINVAL;
|
errno = EINVAL;
|
||||||
return NULL;
|
return NULL;
|
||||||
|
Reference in New Issue
Block a user