mirror of
https://github.com/antirez/gguf-tools.git
synced 2025-09-16 17:48:08 +08:00
Add support for q4_0 and q4_1 quantizations
This commit is contained in:
91
gguflib.c
91
gguflib.c
@@ -778,6 +778,89 @@ void gguf_q2_k_to_float(void *weights_data, void *dst, uint64_t count, store_flo
|
||||
}
|
||||
}
|
||||
|
||||
/* Q4_0 blocks dequantization to floats.
|
||||
* 'dst' is supposed to have enough space for 'count' weights. */
|
||||
void gguf_q4_0_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) {
|
||||
float *f = dst;
|
||||
struct gguf_tensor_type_features *tf =
|
||||
gguf_get_tensor_type_features(GGUF_TYPE_Q4_0);
|
||||
|
||||
/* Very simple layout: |16 bit scale|32 x 4bit weights|
|
||||
* Each weight is scale * (quantized_weight[0..31] - 8) */
|
||||
uint8_t *block = weights_data;
|
||||
uint64_t i = 0; // i-th weight to dequantize.
|
||||
while(i < count) {
|
||||
/* For each block get the scale and convert all the
|
||||
* weights in the block. */
|
||||
float scale = from_half(*((uint16_t*)block));
|
||||
/* First 16 weights are in the lower bits */
|
||||
for (uint32_t j = 0; j < 16; j++) {
|
||||
uint8_t value = block[j+2]; // j+2 to skip the scale bytes.
|
||||
value &= 0xf; // lower bits
|
||||
float weight = ((int8_t) value - 8) * scale;
|
||||
if (store_callback)
|
||||
store_callback(dst,i,weight);
|
||||
else
|
||||
f[i] = weight;
|
||||
if (++i == count) break;
|
||||
}
|
||||
/* Last 16 weights are in the higher bits */
|
||||
for (uint32_t j = 0; j < 16; j++) {
|
||||
uint8_t value = block[j+2]; // j+2 to skip the scale bytes.
|
||||
value >>= 4; // higher bits
|
||||
float weight = ((int8_t) value - 8) * scale;
|
||||
if (store_callback)
|
||||
store_callback(dst,i,weight);
|
||||
else
|
||||
f[i] = weight;
|
||||
if (++i == count) break;
|
||||
}
|
||||
block += tf->bytes_per_block; // Go to the next block.
|
||||
}
|
||||
}
|
||||
|
||||
/* Q4_1 blocks dequantization to floats.
|
||||
* 'dst' is supposed to have enough space for 'count' weights. */
|
||||
void gguf_q4_1_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) {
|
||||
float *f = dst;
|
||||
struct gguf_tensor_type_features *tf =
|
||||
gguf_get_tensor_type_features(GGUF_TYPE_Q4_1);
|
||||
|
||||
/* Very simple layout: |16 bit scale|16 bit bias|32 x 4bit weights|
|
||||
* Each weight is scale * quantized_weight[0..31] + bias */
|
||||
uint8_t *block = weights_data;
|
||||
uint64_t i = 0; // i-th weight to dequantize.
|
||||
while(i < count) {
|
||||
/* For each block get the scale and convert all the
|
||||
* weights in the block. */
|
||||
float scale = from_half(*((uint16_t*)block));
|
||||
float bias = from_half(*((uint16_t*)block+2));
|
||||
/* First 16 weights are in the lower bits */
|
||||
for (uint32_t j = 0; j < 16; j++) {
|
||||
uint8_t value = block[j+4]; // j+2 to skip the scale and bias bytes.
|
||||
value &= 0xf; // lower bits
|
||||
float weight = value * scale + bias;
|
||||
if (store_callback)
|
||||
store_callback(dst,i,weight);
|
||||
else
|
||||
f[i] = weight;
|
||||
if (++i == count) break;
|
||||
}
|
||||
/* Last 16 weights are in the higher bits */
|
||||
for (uint32_t j = 0; j < 16; j++) {
|
||||
uint8_t value = block[j+4]; // j+2 to skip the scale and bias bytes.
|
||||
value >>= 4; // higher bits
|
||||
float weight = value * scale + bias;
|
||||
if (store_callback)
|
||||
store_callback(dst,i,weight);
|
||||
else
|
||||
f[i] = weight;
|
||||
if (++i == count) break;
|
||||
}
|
||||
block += tf->bytes_per_block; // Go to the next block.
|
||||
}
|
||||
}
|
||||
|
||||
/* FP16 blocks dequantization to floats.
|
||||
* 'y' is supposed to have enough space for 'count' weights. */
|
||||
void gguf_f16_to_float(void *weights_data, float *dst, uint64_t count, store_float_callback store_callback) {
|
||||
@@ -814,6 +897,10 @@ float *gguf_tensor_to_float(gguf_tensor *tensor) {
|
||||
gguf_q6_k_to_float(tensor->weights_data, f, tensor->num_weights, NULL);
|
||||
} else if (tensor->type == GGUF_TYPE_Q2_K) {
|
||||
gguf_q2_k_to_float(tensor->weights_data, f, tensor->num_weights, NULL);
|
||||
} else if (tensor->type == GGUF_TYPE_Q4_0) {
|
||||
gguf_q4_0_to_float(tensor->weights_data, f, tensor->num_weights, NULL);
|
||||
} else if (tensor->type == GGUF_TYPE_Q4_1) {
|
||||
gguf_q4_1_to_float(tensor->weights_data, f, tensor->num_weights, NULL);
|
||||
} else {
|
||||
errno = EINVAL;
|
||||
return NULL;
|
||||
@@ -839,6 +926,10 @@ int16_t *gguf_tensor_to_f16(gguf_tensor *tensor) {
|
||||
gguf_q6_k_to_float(tensor->weights_data, f16, tensor->num_weights, gguf_store_f16_callback);
|
||||
} else if (tensor->type == GGUF_TYPE_Q2_K) {
|
||||
gguf_q2_k_to_float(tensor->weights_data, f16, tensor->num_weights, gguf_store_f16_callback);
|
||||
} else if (tensor->type == GGUF_TYPE_Q4_0) {
|
||||
gguf_q4_0_to_float(tensor->weights_data, f16, tensor->num_weights, gguf_store_f16_callback);
|
||||
} else if (tensor->type == GGUF_TYPE_Q4_1) {
|
||||
gguf_q4_1_to_float(tensor->weights_data, f16, tensor->num_weights, gguf_store_f16_callback);
|
||||
} else {
|
||||
errno = EINVAL;
|
||||
return NULL;
|
||||
|
Reference in New Issue
Block a user