mirror of
https://github.com/antirez/gguf-tools.git
synced 2025-09-17 02:28:07 +08:00
Mixtral experts extraction test.
This commit is contained in:
90
gguf-tools.c
90
gguf-tools.c
@@ -2,7 +2,10 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
#include "gguflib.h"
|
#include "gguflib.h"
|
||||||
|
#include "sds.h"
|
||||||
|
|
||||||
/* ========================== Utility functions ============================ */
|
/* ========================== Utility functions ============================ */
|
||||||
|
|
||||||
@@ -201,6 +204,93 @@ void gguf_tools_split_mixtral(int expert_id, const char *mixtral_filename, const
|
|||||||
if (!skip)
|
if (!skip)
|
||||||
gguf_append_kv(output,key.name,key.namelen,key.type,value,value_len);
|
gguf_append_kv(output,key.name,key.namelen,key.type,value,value_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Now it's time to copy the tensors. We need to copy all the shared
|
||||||
|
* tensors (between the different experts), but only a set of
|
||||||
|
* expert-specific tensors corresponding to the expert ID the user
|
||||||
|
* wants to extract. */
|
||||||
|
struct tensor_to_copy {
|
||||||
|
sds dest_name; // Tensor name in the output file.
|
||||||
|
gguf_tensor orig_info; // Original tensor info.
|
||||||
|
uint64_t dest_offset; // Destination offset in output file.
|
||||||
|
uint64_t size; // Tensor total bytes.
|
||||||
|
};
|
||||||
|
|
||||||
|
uint32_t num_tensors = 0;
|
||||||
|
uint32_t max_tensors = 2048;
|
||||||
|
|
||||||
|
struct tensor_to_copy *tensors =
|
||||||
|
malloc(sizeof(struct tensor_to_copy)*max_tensors);
|
||||||
|
if (tensors == NULL) {
|
||||||
|
perror("Allocating tensors info array");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Scan Mixtral tensors looking for the ones we need to copy
|
||||||
|
* in the output model. */
|
||||||
|
gguf_tensor tensor_info;
|
||||||
|
while (gguf_get_tensor(mixtral,&tensor_info)) {
|
||||||
|
assert(num_tensors < max_tensors);
|
||||||
|
|
||||||
|
char tn[1024]; // Tensor name as null terminated string.
|
||||||
|
snprintf(tn,sizeof(tn),"%.*s",(int)tensor_info.namelen, tensor_info.name);
|
||||||
|
|
||||||
|
/* The tensor is a feed-forward tensor? We want to copy only
|
||||||
|
* the ones of our expert ID. */
|
||||||
|
if (strstr(tn,".ffn_") != NULL && strstr(tn,".ffn_norm") == NULL) {
|
||||||
|
char match[32];
|
||||||
|
snprintf(match,sizeof(match),".%d.weight",expert_id);
|
||||||
|
char *match_ptr = strstr(tn,match);
|
||||||
|
if (match_ptr == NULL) {
|
||||||
|
printf("Skipping tensor %s\n", tn);
|
||||||
|
continue; // Skip this tensor.
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We need to remove the .<id>. from the name. */
|
||||||
|
size_t taillen = strlen(match_ptr);
|
||||||
|
memmove(match_ptr,match_ptr+2,taillen+1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Create the entry for this tensor. Later we will scan all our
|
||||||
|
* entries and append data to our output tensor. */
|
||||||
|
tensors[num_tensors].dest_name = sdsnew(tn);
|
||||||
|
if (tensors[num_tensors].dest_name == NULL) {
|
||||||
|
perror("Allocating test tensor name");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
tensors[num_tensors].orig_info = tensor_info;
|
||||||
|
tensors[num_tensors].size = tensor_info.bsize;
|
||||||
|
num_tensors++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Now we need to set the offset for our destination tensors. As
|
||||||
|
* we calculate the offsets, we can emit the tensors information
|
||||||
|
* section as well. */
|
||||||
|
uint64_t tensor_off = 0; // Tensor offsets are relative to data section,
|
||||||
|
// so we start at offset 0.
|
||||||
|
for (uint32_t j = 0; j < num_tensors; j++) {
|
||||||
|
/* Align offset. */
|
||||||
|
tensor_off += gguf_get_alignment_padding(mixtral->alignment,tensor_off);
|
||||||
|
tensors[j].dest_offset = tensor_off;
|
||||||
|
if (gguf_append_tensor_info(output,tensors[j].dest_name,strlen(tensors[j].dest_name),tensors[j].orig_info.ndim,tensors[j].orig_info.dim,tensors[j].orig_info.type,tensor_off) == 0)
|
||||||
|
{
|
||||||
|
perror("Failed to append tensor info");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
tensor_off += tensors[j].orig_info.bsize;
|
||||||
|
}
|
||||||
|
printf("Output file: after writing tensors info, file size is: %llu\n", output->size);
|
||||||
|
|
||||||
|
/* Finally, append the tensors weights. */
|
||||||
|
for (uint32_t j = 0; j < num_tensors; j++) {
|
||||||
|
printf("Writing tensor %s\n", tensors[j].dest_name);
|
||||||
|
if (gguf_append_tensor_data(output,tensors[j].orig_info.weights_data,
|
||||||
|
tensors[j].orig_info.bsize) == 0)
|
||||||
|
{
|
||||||
|
perror("Failed to append tensor data");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
30
gguflib.c
30
gguflib.c
@@ -451,7 +451,7 @@ int gguf_append_kv(gguf_ctx *ctx, const char *keyname, uint64_t keylen, uint32_t
|
|||||||
|
|
||||||
/* Append tensor metadata (but not the actual tensor weights data) to the
|
/* Append tensor metadata (but not the actual tensor weights data) to the
|
||||||
* GGUF file identified by 'ctx'. */
|
* GGUF file identified by 'ctx'. */
|
||||||
int gguf_append_tensor(gguf_ctx *ctx, const char *tensorname, uint64_t namelen, uint32_t num_dim, uint64_t *dim, uint32_t type, uint64_t offset)
|
int gguf_append_tensor_info(gguf_ctx *ctx, const char *tensorname, uint64_t namelen, uint32_t num_dim, uint64_t *dim, uint32_t type, uint64_t offset)
|
||||||
{
|
{
|
||||||
if (write(ctx->fd,&namelen,sizeof(namelen)) != sizeof(namelen)) return 0;
|
if (write(ctx->fd,&namelen,sizeof(namelen)) != sizeof(namelen)) return 0;
|
||||||
if (write(ctx->fd,tensorname,namelen) != (ssize_t)namelen) return 0;
|
if (write(ctx->fd,tensorname,namelen) != (ssize_t)namelen) return 0;
|
||||||
@@ -467,32 +467,14 @@ int gguf_append_tensor(gguf_ctx *ctx, const char *tensorname, uint64_t namelen,
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Append tensor data enforcing the GGUF file aligment. The user must specify
|
/* Append tensor data enforcing the GGUF file aligment.
|
||||||
* an offset that requires no more than ctx.alignemnt-1 padding bytes starting
|
* The function will take care to add the padding required to start writing
|
||||||
* from the current offset (this means that this function should be called
|
* the tensor at an alignment multiple. */
|
||||||
* sequentially for all the tensors we want to store, after we already
|
int gguf_append_tensor_data(gguf_ctx *ctx, void *tensor, uint64_t tensor_size) {
|
||||||
* computed the right offset for all the tensors). Also the offset must be
|
|
||||||
* aligned. Otherwise the function will fail returning 0. On success, 1 is
|
|
||||||
* returned. The function will take care to add the padding required to
|
|
||||||
* start writing the tensor at the specified offset. */
|
|
||||||
int gguf_append_tensor_data(gguf_ctx *ctx, uint64_t offset, void *tensor, uint64_t tensor_size) {
|
|
||||||
char padding_data[1024] = {0};
|
char padding_data[1024] = {0};
|
||||||
assert(sizeof(padding_data) >= ctx->alignment);
|
assert(sizeof(padding_data) >= ctx->alignment);
|
||||||
|
|
||||||
/* Is the offset aligned? */
|
uint64_t padding = gguf_get_alignment_padding(ctx->alignment,ctx->size);
|
||||||
if (offset % ctx->alignment) {
|
|
||||||
errno = EINVAL;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* We expect the offset of the context to be already where this tensor
|
|
||||||
* should be stored, minus the padding. */
|
|
||||||
if (offset < ctx->off || offset - ctx->off >= ctx->alignment) {
|
|
||||||
errno = EINVAL;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t padding = gguf_get_alignment_padding(ctx->alignment,offset);
|
|
||||||
if (write(ctx->fd,padding_data,padding) != (ssize_t)padding) return 0;
|
if (write(ctx->fd,padding_data,padding) != (ssize_t)padding) return 0;
|
||||||
if (write(ctx->fd,tensor,tensor_size) != (ssize_t)tensor_size) return 0;
|
if (write(ctx->fd,tensor,tensor_size) != (ssize_t)tensor_size) return 0;
|
||||||
gguf_remap(ctx);
|
gguf_remap(ctx);
|
||||||
|
@@ -174,8 +174,8 @@ void gguf_do_with_value(gguf_ctx *ctx, uint32_t type, union gguf_value *val,
|
|||||||
uint64_t array_len));
|
uint64_t array_len));
|
||||||
void gguf_print_value(gguf_ctx *ctx, uint32_t type, union gguf_value *val, int full);
|
void gguf_print_value(gguf_ctx *ctx, uint32_t type, union gguf_value *val, int full);
|
||||||
int gguf_append_kv(gguf_ctx *ctx, const char *keyname, uint64_t keylen, uint32_t type, void *val, uint64_t len);
|
int gguf_append_kv(gguf_ctx *ctx, const char *keyname, uint64_t keylen, uint32_t type, void *val, uint64_t len);
|
||||||
int gguf_append_tensor(gguf_ctx *ctx, const char *tensorname, uint64_t namelen, uint32_t num_dim, uint64_t *dim, uint32_t type, uint64_t offset);
|
int gguf_append_tensor_info(gguf_ctx *ctx, const char *tensorname, uint64_t namelen, uint32_t num_dim, uint64_t *dim, uint32_t type, uint64_t offset);
|
||||||
int gguf_append_tensor_data(gguf_ctx *ctx, uint64_t offset, void *tensor, uint64_t tensor_size);
|
int gguf_append_tensor_data(gguf_ctx *ctx, void *tensor, uint64_t tensor_size);
|
||||||
uint64_t gguf_get_alignment_padding(uint64_t alignment, uint64_t offset);
|
uint64_t gguf_get_alignment_padding(uint64_t alignment, uint64_t offset);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Reference in New Issue
Block a user