FP16 added. Split-mixtral improved.

This commit is contained in:
antirez
2023-12-27 15:13:42 +01:00
parent a77a4d061c
commit bd4ecbda94
6 changed files with 144 additions and 8 deletions

View File

@@ -169,8 +169,9 @@ void gguf_tools_show(const char *filename) {
}
/* Read a Mixtral MoE model and creates a new non-MoE GGUF file based
* on the weights of the expert with id 'expert_id'. */
void gguf_tools_split_mixtral(int expert_id, const char *mixtral_filename, const char *output_filename) {
* on the weights of the experts with IDs in the array of 'experts_id'.
* The array must contain 32 integers, one for each layer. */
void gguf_tools_split_mixtral(int *experts_id, const char *mixtral_filename, const char *output_filename) {
gguf_ctx *mixtral = gguf_init(mixtral_filename);
if (mixtral == NULL) {
perror("Opening Mixtral file");
@@ -238,6 +239,20 @@ void gguf_tools_split_mixtral(int expert_id, const char *mixtral_filename, const
/* The tensor is a feed-forward tensor? We want to copy only
* the ones of our expert ID. */
if (strstr(tn,".ffn_") != NULL && strstr(tn,".ffn_norm") == NULL) {
/* Extract which block this FFN belongs. */
int block;
assert(memcmp(tn,"blk.",4) == 0); // Must start with blk.<block>
char *p = strchr(tn+4,'.');
assert(p != NULL);
*p = 0;
block = atoi(tn+4);
*p = '.';
assert(block >= 0 && block < 32);
/* Now that we have the block, we can select the corresponding
* expert ID we want to use for this block. */
int expert_id = experts_id[block];
char match[32];
snprintf(match,sizeof(match),".%d.weight",expert_id);
char *match_ptr = strstr(tn,match);
@@ -283,7 +298,8 @@ void gguf_tools_split_mixtral(int expert_id, const char *mixtral_filename, const
/* Finally, append the tensors weights. */
for (uint32_t j = 0; j < num_tensors; j++) {
printf("Writing tensor %s\n", tensors[j].dest_name);
printf("Writing tensor %s (weights from %.*s)\n", tensors[j].dest_name,
(int)tensors[j].orig_info.namelen, tensors[j].orig_info.name);
if (gguf_append_tensor_data(output,tensors[j].orig_info.weights_data,
tensors[j].orig_info.bsize) == 0)
{
@@ -300,7 +316,9 @@ void gguf_tools_usage(const char *progname) {
printf("Usage: %s <subcommand> [options...]\n"
"Subcommands:\n"
" show <filename> -- show GGUF model keys and tensors.\n"
" split-mixtral <id> mixtral.gguf out.gguf -- extract expert.\n"
" split-mixtral <ids...> mixtral.gguf out.gguf -- extract expert.\n"
"Example:\n"
" split-mixtral 65230776370407150546470161412165 mixtral.gguf out.gguf\n"
, progname);
exit(1);
}
@@ -311,7 +329,22 @@ int main(int argc, char **argv) {
if (!strcmp(argv[1],"show") && argc == 3) {
gguf_tools_show(argv[2]);
} else if (!strcmp(argv[1],"split-mixtral") && argc == 5) {
gguf_tools_split_mixtral(atoi(argv[2]),argv[3],argv[4]);
int experts[32];
size_t elen = strlen(argv[2]);
for (size_t j = 0; j < 32; j++) {
if (j < elen) {
experts[j] = argv[2][j] - '0';
if (experts[j] < 0 || experts[j] > 7) {
fprintf(stderr,"Invalid expert ID: %d\n", experts[j]);
exit(1);
}
} else {
/* If there aren't 32 digits in the input, use the last
* one repeated up to the last layer. */
experts[j] = j > 1 ? experts[j-1] : 0;
}
}
gguf_tools_split_mixtral(experts,argv[3],argv[4]);
} else {
gguf_tools_usage(argv[0]);
}