mirror of
https://github.com/antirez/gguf-tools.git
synced 2025-09-18 04:08:09 +08:00
split-mixtral: copying of keys + APIs needed.
This commit is contained in:
41
gguf-tools.c
41
gguf-tools.c
@@ -165,6 +165,45 @@ void gguf_tools_show(const char *filename) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Read a Mixtral MoE model and creates a new non-MoE GGUF file based
|
||||||
|
* on the weights of the expert with id 'expert_id'. */
|
||||||
|
void gguf_tools_split_mixtral(int expert_id, const char *mixtral_filename, const char *output_filename) {
|
||||||
|
gguf_ctx *mixtral = gguf_init(mixtral_filename);
|
||||||
|
if (mixtral == NULL) {
|
||||||
|
perror("Opening Mixtral file");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
gguf_ctx *output = gguf_create(output_filename);
|
||||||
|
if (output == NULL) {
|
||||||
|
perror("Opening the output file");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* To start, copy all the key value items, excluding the one
|
||||||
|
* related to the experts. */
|
||||||
|
gguf_key key;
|
||||||
|
while (gguf_get_key(mixtral,&key)) {
|
||||||
|
char keybuf[1024];
|
||||||
|
snprintf(keybuf,sizeof(keybuf),"%.*s",(int)key.namelen, key.name);
|
||||||
|
|
||||||
|
int skip = strstr(keybuf,"llama.expert_") != NULL;
|
||||||
|
|
||||||
|
if (!skip)
|
||||||
|
printf("Copying %s\n", keybuf);
|
||||||
|
uint64_t value_start_offset = mixtral->off;
|
||||||
|
void *value = mixtral->data+mixtral->off;
|
||||||
|
// Just consume the value without doing anything with it.
|
||||||
|
gguf_do_with_value(mixtral,key.type,key.val,NULL,0,0,NULL);
|
||||||
|
uint64_t value_len = mixtral->off - value_start_offset;
|
||||||
|
|
||||||
|
// Now append the value to the output model.
|
||||||
|
if (!skip)
|
||||||
|
gguf_append_kv(output,key.name,key.namelen,key.type,value,value_len);
|
||||||
|
}
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
/* ======================= Main and CLI options parsing ===================== */
|
/* ======================= Main and CLI options parsing ===================== */
|
||||||
|
|
||||||
void gguf_tools_usage(const char *progname) {
|
void gguf_tools_usage(const char *progname) {
|
||||||
@@ -181,6 +220,8 @@ int main(int argc, char **argv) {
|
|||||||
|
|
||||||
if (!strcmp(argv[1],"show") && argc == 3) {
|
if (!strcmp(argv[1],"show") && argc == 3) {
|
||||||
gguf_tools_show(argv[2]);
|
gguf_tools_show(argv[2]);
|
||||||
|
} else if (!strcmp(argv[1],"split-mixtral") && argc == 5) {
|
||||||
|
gguf_tools_split_mixtral(atoi(argv[2]),argv[3],argv[4]);
|
||||||
} else {
|
} else {
|
||||||
gguf_tools_usage(argv[0]);
|
gguf_tools_usage(argv[0]);
|
||||||
}
|
}
|
||||||
|
43
gguflib.c
43
gguflib.c
@@ -306,16 +306,19 @@ void gguf_do_with_value(gguf_ctx *ctx, uint32_t type, union gguf_value *val,
|
|||||||
len = val->array.len;
|
len = val->array.len;
|
||||||
//exit(1);
|
//exit(1);
|
||||||
ctx->off += 4+8; // Skip elements type / array length.
|
ctx->off += 4+8; // Skip elements type / array length.
|
||||||
callback(privdata,GGUF_VALUE_TYPE_ARRAY_START,val,in_array,len);
|
if (callback)
|
||||||
|
callback(privdata,GGUF_VALUE_TYPE_ARRAY_START,val,in_array,len);
|
||||||
for (uint64_t j = 0; j < len; j++) {
|
for (uint64_t j = 0; j < len; j++) {
|
||||||
val = (union gguf_value*)(ctx->data+ctx->off);
|
val = (union gguf_value*)(ctx->data+ctx->off);
|
||||||
gguf_do_with_value(ctx,etype,val,privdata,j+1,len,callback);
|
gguf_do_with_value(ctx,etype,val,privdata,j+1,len,callback);
|
||||||
/* As a side effect of calling gguf_do_with_value() ctx->off
|
/* As a side effect of calling gguf_do_with_value() ctx->off
|
||||||
* will be update, so 'val' will be set to the next element. */
|
* will be update, so 'val' will be set to the next element. */
|
||||||
}
|
}
|
||||||
callback(privdata,GGUF_VALUE_TYPE_ARRAY_END,NULL,in_array,len);
|
if (callback)
|
||||||
|
callback(privdata,GGUF_VALUE_TYPE_ARRAY_END,NULL,in_array,len);
|
||||||
} else {
|
} else {
|
||||||
callback(privdata,type,val,in_array,array_len);
|
if (callback)
|
||||||
|
callback(privdata,type,val,in_array,array_len);
|
||||||
ctx->off += gguf_value_len(type,val);
|
ctx->off += gguf_value_len(type,val);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -397,7 +400,7 @@ void gguf_print_value(gguf_ctx *ctx, uint32_t type, union gguf_value *val, int f
|
|||||||
*
|
*
|
||||||
* On success the context with the file already loaded is returned,
|
* On success the context with the file already loaded is returned,
|
||||||
* otherwise NULL is returned. */
|
* otherwise NULL is returned. */
|
||||||
gguf_ctx *guff_create(const char *filename) {
|
gguf_ctx *gguf_create(const char *filename) {
|
||||||
struct gguf_header hdr;
|
struct gguf_header hdr;
|
||||||
memcpy(&hdr.magic,"GGUF",4);
|
memcpy(&hdr.magic,"GGUF",4);
|
||||||
hdr.version = 3;
|
hdr.version = 3;
|
||||||
@@ -456,3 +459,35 @@ int gguf_append_tensor(gguf_ctx *ctx, const char *tensorname, uint64_t namelen,
|
|||||||
ctx->header->tensor_count++;
|
ctx->header->tensor_count++;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Append tensor data enforcing the GGUF file aligment. The user must specify
|
||||||
|
* an offset that requires no more than ctx.alignemnt-1 padding bytes starting
|
||||||
|
* from the current offset (this means that this function should be called
|
||||||
|
* sequentially for all the tensors we want to store, after we already
|
||||||
|
* computed the right offset for all the tensors). Also the offset must be
|
||||||
|
* aligned. Otherwise the function will fail returning 0. On success, 1 is
|
||||||
|
* returned. The function will take care to add the padding required to
|
||||||
|
* start writing the tensor at the specified offset. */
|
||||||
|
int gguf_append_tensor_data(gguf_ctx *ctx, uint64_t offset, void *tensor, uint64_t tensor_size) {
|
||||||
|
char padding_data[1024] = {0};
|
||||||
|
assert(sizeof(padding_data) >= ctx->alignment);
|
||||||
|
|
||||||
|
/* Is the offset aligned? */
|
||||||
|
if (offset % ctx->alignment) {
|
||||||
|
errno = EINVAL;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We expect the offset of the context to be already where this tensor
|
||||||
|
* should be stored, minus the padding. */
|
||||||
|
if (offset < ctx->off || offset - ctx->off >= ctx->alignment) {
|
||||||
|
errno = EINVAL;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t padding = gguf_get_alignment_padding(ctx->alignment,offset);
|
||||||
|
if (write(ctx->fd,padding_data,padding) != (ssize_t)padding) return 0;
|
||||||
|
if (write(ctx->fd,tensor,tensor_size) != (ssize_t)tensor_size) return 0;
|
||||||
|
gguf_remap(ctx);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
@@ -159,6 +159,7 @@ typedef struct {
|
|||||||
/* =============================== Prototypes =============================== */
|
/* =============================== Prototypes =============================== */
|
||||||
|
|
||||||
gguf_ctx *gguf_init(const char *filename);
|
gguf_ctx *gguf_init(const char *filename);
|
||||||
|
gguf_ctx *gguf_create(const char *filename);
|
||||||
int gguf_remap(gguf_ctx *ctx);
|
int gguf_remap(gguf_ctx *ctx);
|
||||||
void gguf_rewind(gguf_ctx *ctx);
|
void gguf_rewind(gguf_ctx *ctx);
|
||||||
void gguf_end(gguf_ctx *ctx);
|
void gguf_end(gguf_ctx *ctx);
|
||||||
@@ -172,5 +173,9 @@ void gguf_do_with_value(gguf_ctx *ctx, uint32_t type, union gguf_value *val,
|
|||||||
union gguf_value *val, uint64_t in_array,
|
union gguf_value *val, uint64_t in_array,
|
||||||
uint64_t array_len));
|
uint64_t array_len));
|
||||||
void gguf_print_value(gguf_ctx *ctx, uint32_t type, union gguf_value *val, int full);
|
void gguf_print_value(gguf_ctx *ctx, uint32_t type, union gguf_value *val, int full);
|
||||||
|
int gguf_append_kv(gguf_ctx *ctx, const char *keyname, uint64_t keylen, uint32_t type, void *val, uint64_t len);
|
||||||
|
int gguf_append_tensor(gguf_ctx *ctx, const char *tensorname, uint64_t namelen, uint32_t num_dim, uint64_t *dim, uint32_t type, uint64_t offset);
|
||||||
|
int gguf_append_tensor_data(gguf_ctx *ctx, uint64_t offset, void *tensor, uint64_t tensor_size);
|
||||||
|
uint64_t gguf_get_alignment_padding(uint64_t alignment, uint64_t offset);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Reference in New Issue
Block a user