diff --git a/fp16.h b/fp16.h index ad26ea7..10d2d82 100644 --- a/fp16.h +++ b/fp16.h @@ -1,6 +1,6 @@ /* See fp16.c. */ -#ifdef FP16_h +#ifndef FP16_h #define FP16_h float from_half(uint16_t h); uint16_t to_half(float f); diff --git a/gguf-tools.c b/gguf-tools.c index 0b0521d..5f6872c 100644 --- a/gguf-tools.c +++ b/gguf-tools.c @@ -3,9 +3,11 @@ #include #include #include +#include #include "gguflib.h" #include "sds.h" +#include "fp16.h" /* ========================== Utility functions ============================ */ @@ -310,15 +312,63 @@ void gguf_tools_split_mixtral(int *experts_id, const char *mixtral_filename, con exit(0); } +void gguf_tools_inspect_weights(const char *filename, const char *tname, uint64_t count) { + gguf_ctx *ctx = gguf_init(filename); + if (ctx == NULL) { + perror("Opening GGUF file"); + exit(1); + } + + /* Skip all the key-value pairs. */ + gguf_skip_key_values_section(ctx); + + /* Look for the tensor with the specified name. */ + size_t tnamelen = strlen(tname); + gguf_tensor tensor; + while (gguf_get_tensor(ctx,&tensor)) { + if (tensor.namelen != tnamelen || + memcmp(tensor.name,tname,tnamelen)) continue; + break; // Matching tensor found! + } + + if (tensor.name == NULL) { + fprintf(stderr, "A tensor with the specified name was not found\n"); + exit(1); + } + + float *weights = gguf_tensor_to_float(&tensor); + if (weights == NULL) { + if (errno == EINVAL) { + fprintf(stderr,"Unsupported tensor type: %s\n", + gguf_get_tensor_type_name(tensor.type)); + } else { + fprintf(stderr,"Out of memory\n"); + } + exit(1); + } + + uint64_t j = 0; + while (j < tensor.num_weights) { + printf("%f, ", weights[j]); + j++; + if (j % 4 == 0) printf("\n"); + if (j == count) break; + } + if (j % 4 != 0) printf("\n"); + free(weights); + return; +} + /* ======================= Main and CLI options parsing ===================== */ void gguf_tools_usage(const char *progname) { printf("Usage: %s [options...]\n" - "Subcommands:\n" - " show -- show GGUF model keys and tensors.\n" - " split-mixtral mixtral.gguf out.gguf -- extract expert.\n" - "Example:\n" - " split-mixtral 65230776370407150546470161412165 mixtral.gguf out.gguf\n" +"Subcommands:\n" +" show -- show GGUF model keys and tensors.\n" +" inspect-tensor [count] -- show tensor weights.\n" +" split-mixtral mixtral.gguf out.gguf -- extract expert.\n" +"Example:\n" +" split-mixtral 65230776370407150546470161412165 mixtral.gguf out.gguf\n" , progname); exit(1); } @@ -328,6 +378,9 @@ int main(int argc, char **argv) { if (!strcmp(argv[1],"show") && argc == 3) { gguf_tools_show(argv[2]); + } else if (!strcmp(argv[1],"inspect-tensor") && (argc == 4 || argc == 5)) { + gguf_tools_inspect_weights(argv[2],argv[3], + argc == 5 ? atoi(argv[4]) : 0); } else if (!strcmp(argv[1],"split-mixtral") && argc == 5) { int experts[32]; size_t elen = strlen(argv[2]); diff --git a/gguflib.c b/gguflib.c index c409aee..396a078 100644 --- a/gguflib.c +++ b/gguflib.c @@ -10,6 +10,7 @@ #include #include "gguflib.h" +#include "fp16.h" /* ============================ Low level functions ========================= */ @@ -186,6 +187,14 @@ int gguf_get_key(gguf_ctx *ctx, gguf_key *key) { return 1; } +/* Skip all the key values pairs in the GGUF files to get to the + * tensors information segment. */ +void gguf_skip_key_values_section(gguf_ctx *ctx) { + gguf_key key; + while (gguf_get_key(ctx,&key)) + gguf_do_with_value(ctx,key.type,key.val,NULL,0,0,NULL); +} + /* Given an offset or a length, returns the padding needed to align it * to ctx->alignment. */ uint64_t gguf_get_alignment_padding(uint64_t alignment, uint64_t offset) { @@ -219,14 +228,22 @@ void gguf_set_data_offset(gguf_ctx *ctx) { * there are still key-value pairs to process before getting into the * tensors section. * - * When 0 is returned, we are at the end of the file and as a side - * effect this function will set the data offset ctx->data_off. */ + * The first time this function is called, as a side effect it will + * set ctx->data_off to return tensors with absolute offsets. + * + * When 0 is returned, the tensor name is set to NULL, so that after + * a while() loop scanning tensors for a given condition, the caller + * can easily understand if the search terminated because the loop + * was exit or because all the entries were consumed. */ int gguf_get_tensor(gguf_ctx *ctx, gguf_tensor *tensor) { - if (ctx->left_tensors == 0 || ctx->left_kv != 0) return 0; + if (ctx->left_tensors == 0 || ctx->left_kv != 0) { + tensor->name = NULL; + return 0; + } /* We want to return tensor data with offsets relative to the start * of the file, so that the user of the API is able to access tensors - * as it iterates over them. To do so, we need to perform a fulls + * as it iterates over them. To do so, we need to perform a full * scan if this is the first tensor info we are reading. */ if (ctx->data_off == 0) gguf_set_data_offset(ctx); @@ -480,3 +497,36 @@ int gguf_append_tensor_data(gguf_ctx *ctx, void *tensor, uint64_t tensor_size) { gguf_remap(ctx); return 1; } + +/* ============================ GGUF dequantization ========================= */ + +/* Convert the specified tensor (quantized or not) into an array of + * floats. The array is allocated with malloc(). If the tensor is already + * in FP32 floats format, it is just memcpy()-ed to the destination array. + * + * On OOM, NULL is returned. If the tensor format is not yet supported, + * NULL is returned as well, but errno is set to EINVAL. */ +float *gguf_tensor_to_float(gguf_tensor *tensor) { + struct gguf_tensor_type_features *tf = + gguf_get_tensor_type_features(tensor->type); + uint64_t block_size = tf->bytes_per_block; + float *f = malloc(tensor->num_weights*sizeof(float)); + if (tensor->type == GUFF_TYPE_Q8_0) { + int8_t *block = (int8_t*)tensor->weights_data; + uint64_t i = 0; + while(i < tensor->num_weights) { + /* For each block get the delta and convert all the + * weights in the block. */ + float delta = from_half(*((uint16_t*)block)); + for (uint32_t j = 0; j < tf->items_per_block; j++) { + f[i++] = block[j+2] * delta; // j+2 to skip the delta bytes. + if (i == tensor->num_weights) break; + } + block += block_size; // Go to the next block. + } + } else { + errno = EINVAL; + return NULL; + } + return f; +} diff --git a/gguflib.h b/gguflib.h index 09f0998..815ae88 100644 --- a/gguflib.h +++ b/gguflib.h @@ -178,5 +178,7 @@ int gguf_append_kv(gguf_ctx *ctx, const char *keyname, uint64_t keylen, uint32_t int gguf_append_tensor_info(gguf_ctx *ctx, const char *tensorname, uint64_t namelen, uint32_t num_dim, uint64_t *dim, uint32_t type, uint64_t offset); int gguf_append_tensor_data(gguf_ctx *ctx, void *tensor, uint64_t tensor_size); uint64_t gguf_get_alignment_padding(uint64_t alignment, uint64_t offset); +void gguf_skip_key_values_section(gguf_ctx *ctx); +float *gguf_tensor_to_float(gguf_tensor *tensor); #endif