chore: make the Deepseek example compatible with Yi models. (#205)

* Update convert.py * Update convert.py * Update deepseek_coder.py
2025-08-16 15:47:19 +08:00 · 2023-12-31 01:11:33 +11:00 · 2023-12-31 01:11:33 +11:00 · ee3c44d231
commit ee3c44d231
parent 581a5733a1
2 changed files with 4 additions and 3 deletions
--- a/llms/deepseek-coder/convert.py
+++ b/llms/deepseek-coder/convert.py
@ -44,7 +44,7 @@ def convert(args):
    config = model.config.to_dict()
    state_dict = model.state_dict()
-    tokenizer = AutoTokenizer.from_pretrained(str(hf_path), trust_remote_code=True)
+    tokenizer = AutoTokenizer.from_pretrained(str(hf_path), trust_remote_code=True, use_fast=False)
    # things to change
    # 1. there's no "model." in the weight names
@ -84,7 +84,7 @@ def convert(args):
    weights = {k: v.numpy() for k, v in state_dict.items()}
-    config["rope_scaling_factor"] = config["rope_scaling"]["factor"]
+    config["rope_scaling_factor"] = config["rope_scaling"]["factor"] if config["rope_scaling"] is not None else 1.0
    keep_keys = set(
        [
            "vocab_size",
@ -96,6 +96,7 @@ def convert(args):
            "rms_norm_eps",
            "intermediate_size",
            "rope_scaling_factor",
            "rope_theta"
        ]
    )
    for k in list(config.keys()):
--- a/llms/deepseek-coder/deepseek_coder.py
+++ b/llms/deepseek-coder/deepseek_coder.py
@ -248,7 +248,7 @@ def load_model(model_path: str):
        nn.QuantizedLinear.quantize_module(model, **quantization)
    model.update(tree_unflatten(list(weights.items())))
-    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True, use_fast=False)
    return model, tokenizer