From 3081d69b8eaf28df032941253dc4bc356bf37d8a Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 26 Dec 2023 09:14:50 +0100 Subject: [PATCH] split-mixtral: copying of keys + APIs needed. --- gguf-tools.c | 41 +++++++++++++++++++++++++++++++++++++++++ gguflib.c | 43 +++++++++++++++++++++++++++++++++++++++---- gguflib.h | 5 +++++ 3 files changed, 85 insertions(+), 4 deletions(-) diff --git a/gguf-tools.c b/gguf-tools.c index 8037513..91dfd44 100644 --- a/gguf-tools.c +++ b/gguf-tools.c @@ -165,6 +165,45 @@ void gguf_tools_show(const char *filename) { return; } +/* Read a Mixtral MoE model and creates a new non-MoE GGUF file based + * on the weights of the expert with id 'expert_id'. */ +void gguf_tools_split_mixtral(int expert_id, const char *mixtral_filename, const char *output_filename) { + gguf_ctx *mixtral = gguf_init(mixtral_filename); + if (mixtral == NULL) { + perror("Opening Mixtral file"); + exit(1); + } + + gguf_ctx *output = gguf_create(output_filename); + if (output == NULL) { + perror("Opening the output file"); + exit(1); + } + + /* To start, copy all the key value items, excluding the one + * related to the experts. */ + gguf_key key; + while (gguf_get_key(mixtral,&key)) { + char keybuf[1024]; + snprintf(keybuf,sizeof(keybuf),"%.*s",(int)key.namelen, key.name); + + int skip = strstr(keybuf,"llama.expert_") != NULL; + + if (!skip) + printf("Copying %s\n", keybuf); + uint64_t value_start_offset = mixtral->off; + void *value = mixtral->data+mixtral->off; + // Just consume the value without doing anything with it. + gguf_do_with_value(mixtral,key.type,key.val,NULL,0,0,NULL); + uint64_t value_len = mixtral->off - value_start_offset; + + // Now append the value to the output model. + if (!skip) + gguf_append_kv(output,key.name,key.namelen,key.type,value,value_len); + } + exit(0); +} + /* ======================= Main and CLI options parsing ===================== */ void gguf_tools_usage(const char *progname) { @@ -181,6 +220,8 @@ int main(int argc, char **argv) { if (!strcmp(argv[1],"show") && argc == 3) { gguf_tools_show(argv[2]); + } else if (!strcmp(argv[1],"split-mixtral") && argc == 5) { + gguf_tools_split_mixtral(atoi(argv[2]),argv[3],argv[4]); } else { gguf_tools_usage(argv[0]); } diff --git a/gguflib.c b/gguflib.c index 4c1deda..5216b2a 100644 --- a/gguflib.c +++ b/gguflib.c @@ -306,16 +306,19 @@ void gguf_do_with_value(gguf_ctx *ctx, uint32_t type, union gguf_value *val, len = val->array.len; //exit(1); ctx->off += 4+8; // Skip elements type / array length. - callback(privdata,GGUF_VALUE_TYPE_ARRAY_START,val,in_array,len); + if (callback) + callback(privdata,GGUF_VALUE_TYPE_ARRAY_START,val,in_array,len); for (uint64_t j = 0; j < len; j++) { val = (union gguf_value*)(ctx->data+ctx->off); gguf_do_with_value(ctx,etype,val,privdata,j+1,len,callback); /* As a side effect of calling gguf_do_with_value() ctx->off * will be update, so 'val' will be set to the next element. */ } - callback(privdata,GGUF_VALUE_TYPE_ARRAY_END,NULL,in_array,len); + if (callback) + callback(privdata,GGUF_VALUE_TYPE_ARRAY_END,NULL,in_array,len); } else { - callback(privdata,type,val,in_array,array_len); + if (callback) + callback(privdata,type,val,in_array,array_len); ctx->off += gguf_value_len(type,val); } } @@ -397,7 +400,7 @@ void gguf_print_value(gguf_ctx *ctx, uint32_t type, union gguf_value *val, int f * * On success the context with the file already loaded is returned, * otherwise NULL is returned. */ -gguf_ctx *guff_create(const char *filename) { +gguf_ctx *gguf_create(const char *filename) { struct gguf_header hdr; memcpy(&hdr.magic,"GGUF",4); hdr.version = 3; @@ -456,3 +459,35 @@ int gguf_append_tensor(gguf_ctx *ctx, const char *tensorname, uint64_t namelen, ctx->header->tensor_count++; return 1; } + +/* Append tensor data enforcing the GGUF file aligment. The user must specify + * an offset that requires no more than ctx.alignemnt-1 padding bytes starting + * from the current offset (this means that this function should be called + * sequentially for all the tensors we want to store, after we already + * computed the right offset for all the tensors). Also the offset must be + * aligned. Otherwise the function will fail returning 0. On success, 1 is + * returned. The function will take care to add the padding required to + * start writing the tensor at the specified offset. */ +int gguf_append_tensor_data(gguf_ctx *ctx, uint64_t offset, void *tensor, uint64_t tensor_size) { + char padding_data[1024] = {0}; + assert(sizeof(padding_data) >= ctx->alignment); + + /* Is the offset aligned? */ + if (offset % ctx->alignment) { + errno = EINVAL; + return 0; + } + + /* We expect the offset of the context to be already where this tensor + * should be stored, minus the padding. */ + if (offset < ctx->off || offset - ctx->off >= ctx->alignment) { + errno = EINVAL; + return 0; + } + + uint64_t padding = gguf_get_alignment_padding(ctx->alignment,offset); + if (write(ctx->fd,padding_data,padding) != (ssize_t)padding) return 0; + if (write(ctx->fd,tensor,tensor_size) != (ssize_t)tensor_size) return 0; + gguf_remap(ctx); + return 1; +} diff --git a/gguflib.h b/gguflib.h index 07cb93e..173df19 100644 --- a/gguflib.h +++ b/gguflib.h @@ -159,6 +159,7 @@ typedef struct { /* =============================== Prototypes =============================== */ gguf_ctx *gguf_init(const char *filename); +gguf_ctx *gguf_create(const char *filename); int gguf_remap(gguf_ctx *ctx); void gguf_rewind(gguf_ctx *ctx); void gguf_end(gguf_ctx *ctx); @@ -172,5 +173,9 @@ void gguf_do_with_value(gguf_ctx *ctx, uint32_t type, union gguf_value *val, union gguf_value *val, uint64_t in_array, uint64_t array_len)); void gguf_print_value(gguf_ctx *ctx, uint32_t type, union gguf_value *val, int full); +int gguf_append_kv(gguf_ctx *ctx, const char *keyname, uint64_t keylen, uint32_t type, void *val, uint64_t len); +int gguf_append_tensor(gguf_ctx *ctx, const char *tensorname, uint64_t namelen, uint32_t num_dim, uint64_t *dim, uint32_t type, uint64_t offset); +int gguf_append_tensor_data(gguf_ctx *ctx, uint64_t offset, void *tensor, uint64_t tensor_size); +uint64_t gguf_get_alignment_padding(uint64_t alignment, uint64_t offset); #endif