mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-06-27 03:05:20 +08:00
chore: make the Deepseek example compatible with Yi models. (#205)
* Update convert.py * Update convert.py * Update deepseek_coder.py
This commit is contained in:
parent
581a5733a1
commit
ee3c44d231
@ -44,7 +44,7 @@ def convert(args):
|
|||||||
config = model.config.to_dict()
|
config = model.config.to_dict()
|
||||||
|
|
||||||
state_dict = model.state_dict()
|
state_dict = model.state_dict()
|
||||||
tokenizer = AutoTokenizer.from_pretrained(str(hf_path), trust_remote_code=True)
|
tokenizer = AutoTokenizer.from_pretrained(str(hf_path), trust_remote_code=True, use_fast=False)
|
||||||
|
|
||||||
# things to change
|
# things to change
|
||||||
# 1. there's no "model." in the weight names
|
# 1. there's no "model." in the weight names
|
||||||
@ -84,7 +84,7 @@ def convert(args):
|
|||||||
|
|
||||||
weights = {k: v.numpy() for k, v in state_dict.items()}
|
weights = {k: v.numpy() for k, v in state_dict.items()}
|
||||||
|
|
||||||
config["rope_scaling_factor"] = config["rope_scaling"]["factor"]
|
config["rope_scaling_factor"] = config["rope_scaling"]["factor"] if config["rope_scaling"] is not None else 1.0
|
||||||
keep_keys = set(
|
keep_keys = set(
|
||||||
[
|
[
|
||||||
"vocab_size",
|
"vocab_size",
|
||||||
@ -96,6 +96,7 @@ def convert(args):
|
|||||||
"rms_norm_eps",
|
"rms_norm_eps",
|
||||||
"intermediate_size",
|
"intermediate_size",
|
||||||
"rope_scaling_factor",
|
"rope_scaling_factor",
|
||||||
|
"rope_theta"
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
for k in list(config.keys()):
|
for k in list(config.keys()):
|
||||||
|
@ -248,7 +248,7 @@ def load_model(model_path: str):
|
|||||||
nn.QuantizedLinear.quantize_module(model, **quantization)
|
nn.QuantizedLinear.quantize_module(model, **quantization)
|
||||||
model.update(tree_unflatten(list(weights.items())))
|
model.update(tree_unflatten(list(weights.items())))
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True, use_fast=False)
|
||||||
return model, tokenizer
|
return model, tokenizer
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user