diff --git a/gguf-tools.c b/gguf-tools.c index 91dfd44..aa012f9 100644 --- a/gguf-tools.c +++ b/gguf-tools.c @@ -2,7 +2,10 @@ #include #include #include +#include + #include "gguflib.h" +#include "sds.h" /* ========================== Utility functions ============================ */ @@ -201,6 +204,93 @@ void gguf_tools_split_mixtral(int expert_id, const char *mixtral_filename, const if (!skip) gguf_append_kv(output,key.name,key.namelen,key.type,value,value_len); } + + /* Now it's time to copy the tensors. We need to copy all the shared + * tensors (between the different experts), but only a set of + * expert-specific tensors corresponding to the expert ID the user + * wants to extract. */ + struct tensor_to_copy { + sds dest_name; // Tensor name in the output file. + gguf_tensor orig_info; // Original tensor info. + uint64_t dest_offset; // Destination offset in output file. + uint64_t size; // Tensor total bytes. + }; + + uint32_t num_tensors = 0; + uint32_t max_tensors = 2048; + + struct tensor_to_copy *tensors = + malloc(sizeof(struct tensor_to_copy)*max_tensors); + if (tensors == NULL) { + perror("Allocating tensors info array"); + exit(1); + } + + /* Scan Mixtral tensors looking for the ones we need to copy + * in the output model. */ + gguf_tensor tensor_info; + while (gguf_get_tensor(mixtral,&tensor_info)) { + assert(num_tensors < max_tensors); + + char tn[1024]; // Tensor name as null terminated string. + snprintf(tn,sizeof(tn),"%.*s",(int)tensor_info.namelen, tensor_info.name); + + /* The tensor is a feed-forward tensor? We want to copy only + * the ones of our expert ID. */ + if (strstr(tn,".ffn_") != NULL && strstr(tn,".ffn_norm") == NULL) { + char match[32]; + snprintf(match,sizeof(match),".%d.weight",expert_id); + char *match_ptr = strstr(tn,match); + if (match_ptr == NULL) { + printf("Skipping tensor %s\n", tn); + continue; // Skip this tensor. + } + + /* We need to remove the .. from the name. */ + size_t taillen = strlen(match_ptr); + memmove(match_ptr,match_ptr+2,taillen+1); + } + + /* Create the entry for this tensor. Later we will scan all our + * entries and append data to our output tensor. */ + tensors[num_tensors].dest_name = sdsnew(tn); + if (tensors[num_tensors].dest_name == NULL) { + perror("Allocating test tensor name"); + exit(1); + } + tensors[num_tensors].orig_info = tensor_info; + tensors[num_tensors].size = tensor_info.bsize; + num_tensors++; + } + + /* Now we need to set the offset for our destination tensors. As + * we calculate the offsets, we can emit the tensors information + * section as well. */ + uint64_t tensor_off = 0; // Tensor offsets are relative to data section, + // so we start at offset 0. + for (uint32_t j = 0; j < num_tensors; j++) { + /* Align offset. */ + tensor_off += gguf_get_alignment_padding(mixtral->alignment,tensor_off); + tensors[j].dest_offset = tensor_off; + if (gguf_append_tensor_info(output,tensors[j].dest_name,strlen(tensors[j].dest_name),tensors[j].orig_info.ndim,tensors[j].orig_info.dim,tensors[j].orig_info.type,tensor_off) == 0) + { + perror("Failed to append tensor info"); + exit(1); + } + tensor_off += tensors[j].orig_info.bsize; + } + printf("Output file: after writing tensors info, file size is: %llu\n", output->size); + + /* Finally, append the tensors weights. */ + for (uint32_t j = 0; j < num_tensors; j++) { + printf("Writing tensor %s\n", tensors[j].dest_name); + if (gguf_append_tensor_data(output,tensors[j].orig_info.weights_data, + tensors[j].orig_info.bsize) == 0) + { + perror("Failed to append tensor data"); + exit(1); + } + } exit(0); } diff --git a/gguflib.c b/gguflib.c index 6cf5b83..c409aee 100644 --- a/gguflib.c +++ b/gguflib.c @@ -451,7 +451,7 @@ int gguf_append_kv(gguf_ctx *ctx, const char *keyname, uint64_t keylen, uint32_t /* Append tensor metadata (but not the actual tensor weights data) to the * GGUF file identified by 'ctx'. */ -int gguf_append_tensor(gguf_ctx *ctx, const char *tensorname, uint64_t namelen, uint32_t num_dim, uint64_t *dim, uint32_t type, uint64_t offset) +int gguf_append_tensor_info(gguf_ctx *ctx, const char *tensorname, uint64_t namelen, uint32_t num_dim, uint64_t *dim, uint32_t type, uint64_t offset) { if (write(ctx->fd,&namelen,sizeof(namelen)) != sizeof(namelen)) return 0; if (write(ctx->fd,tensorname,namelen) != (ssize_t)namelen) return 0; @@ -467,32 +467,14 @@ int gguf_append_tensor(gguf_ctx *ctx, const char *tensorname, uint64_t namelen, return 1; } -/* Append tensor data enforcing the GGUF file aligment. The user must specify - * an offset that requires no more than ctx.alignemnt-1 padding bytes starting - * from the current offset (this means that this function should be called - * sequentially for all the tensors we want to store, after we already - * computed the right offset for all the tensors). Also the offset must be - * aligned. Otherwise the function will fail returning 0. On success, 1 is - * returned. The function will take care to add the padding required to - * start writing the tensor at the specified offset. */ -int gguf_append_tensor_data(gguf_ctx *ctx, uint64_t offset, void *tensor, uint64_t tensor_size) { +/* Append tensor data enforcing the GGUF file aligment. + * The function will take care to add the padding required to start writing + * the tensor at an alignment multiple. */ +int gguf_append_tensor_data(gguf_ctx *ctx, void *tensor, uint64_t tensor_size) { char padding_data[1024] = {0}; assert(sizeof(padding_data) >= ctx->alignment); - /* Is the offset aligned? */ - if (offset % ctx->alignment) { - errno = EINVAL; - return 0; - } - - /* We expect the offset of the context to be already where this tensor - * should be stored, minus the padding. */ - if (offset < ctx->off || offset - ctx->off >= ctx->alignment) { - errno = EINVAL; - return 0; - } - - uint64_t padding = gguf_get_alignment_padding(ctx->alignment,offset); + uint64_t padding = gguf_get_alignment_padding(ctx->alignment,ctx->size); if (write(ctx->fd,padding_data,padding) != (ssize_t)padding) return 0; if (write(ctx->fd,tensor,tensor_size) != (ssize_t)tensor_size) return 0; gguf_remap(ctx); diff --git a/gguflib.h b/gguflib.h index 173df19..cf10367 100644 --- a/gguflib.h +++ b/gguflib.h @@ -174,8 +174,8 @@ void gguf_do_with_value(gguf_ctx *ctx, uint32_t type, union gguf_value *val, uint64_t array_len)); void gguf_print_value(gguf_ctx *ctx, uint32_t type, union gguf_value *val, int full); int gguf_append_kv(gguf_ctx *ctx, const char *keyname, uint64_t keylen, uint32_t type, void *val, uint64_t len); -int gguf_append_tensor(gguf_ctx *ctx, const char *tensorname, uint64_t namelen, uint32_t num_dim, uint64_t *dim, uint32_t type, uint64_t offset); -int gguf_append_tensor_data(gguf_ctx *ctx, uint64_t offset, void *tensor, uint64_t tensor_size); +int gguf_append_tensor_info(gguf_ctx *ctx, const char *tensorname, uint64_t namelen, uint32_t num_dim, uint64_t *dim, uint32_t type, uint64_t offset); +int gguf_append_tensor_data(gguf_ctx *ctx, void *tensor, uint64_t tensor_size); uint64_t gguf_get_alignment_padding(uint64_t alignment, uint64_t offset); #endif