Update convert.py

This commit is contained in:
Anchen
2023-12-30 19:35:25 +11:00
committed by GitHub
parent 581a5733a1
commit c687a4e02e

View File

@@ -44,7 +44,7 @@ def convert(args):
config = model.config.to_dict() config = model.config.to_dict()
state_dict = model.state_dict() state_dict = model.state_dict()
tokenizer = AutoTokenizer.from_pretrained(str(hf_path), trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained(str(hf_path), trust_remote_code=True, use_fast=False)
# things to change # things to change
# 1. there's no "model." in the weight names # 1. there's no "model." in the weight names
@@ -84,7 +84,7 @@ def convert(args):
weights = {k: v.numpy() for k, v in state_dict.items()} weights = {k: v.numpy() for k, v in state_dict.items()}
config["rope_scaling_factor"] = config["rope_scaling"]["factor"] config["rope_scaling_factor"] = config["rope_scaling"]["factor"] if config["rope_scaling"] is not None else 1.0
keep_keys = set( keep_keys = set(
[ [
"vocab_size", "vocab_size",
@@ -96,6 +96,7 @@ def convert(args):
"rms_norm_eps", "rms_norm_eps",
"intermediate_size", "intermediate_size",
"rope_scaling_factor", "rope_scaling_factor",
"rope_theta"
] ]
) )
for k in list(config.keys()): for k in list(config.keys()):
@@ -151,4 +152,4 @@ if __name__ == "__main__":
tokenizer.save_pretrained(mlx_path) tokenizer.save_pretrained(mlx_path)
with open(mlx_path / "config.json", "w") as f: with open(mlx_path / "config.json", "w") as f:
config["model_type"] = "deepseek_coder" config["model_type"] = "deepseek_coder"
json.dump(config, f, indent=4) json.dump(config, f, indent=4)%