Use config.json, add model_type (#157)

* Use config.json, add model_type

* Update convert to generate config.json
This commit is contained in:
Pedro Cuenca 2023-12-20 17:39:37 +01:00 committed by GitHub
parent 4b7e11bd31
commit 730c50d00a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 11 additions and 2 deletions

View File

@ -1,6 +1,7 @@
# Copyright © 2023 Apple Inc. # Copyright © 2023 Apple Inc.
import argparse import argparse
import json
import numpy as np import numpy as np
from pathlib import Path from pathlib import Path
import torch import torch
@ -22,3 +23,10 @@ if __name__ == "__main__":
str(model_path / "weights.npz"), str(model_path / "weights.npz"),
**{k: v.to(torch.float16).numpy() for k, v in state.items()} **{k: v.to(torch.float16).numpy() for k, v in state.items()}
) )
# Save config.json with model_type
with open(model_path / "params.json", "r") as f:
config = json.loads(f.read())
config["model_type"] = "mistral"
with open(model_path / "config.json", "w") as f:
json.dump(config, f, indent=4)

View File

@ -192,9 +192,10 @@ class Tokenizer:
def load_model(folder: str, dtype=mx.float16): def load_model(folder: str, dtype=mx.float16):
model_path = Path(folder) model_path = Path(folder)
tokenizer = Tokenizer(str(model_path / "tokenizer.model")) tokenizer = Tokenizer(str(model_path / "tokenizer.model"))
with open(model_path / "params.json", "r") as f: with open(model_path / "config.json", "r") as f:
config = json.loads(f.read()) config = json.loads(f.read())
config.pop("sliding_window") config.pop("sliding_window", None)
config.pop("model_type", None)
model_args = ModelArgs(**config) model_args = ModelArgs(**config)
weights = mx.load(str(model_path / "weights.npz")) weights = mx.load(str(model_path / "weights.npz"))
weights = tree_unflatten(list(weights.items())) weights = tree_unflatten(list(weights.items()))