From ee3c44d231a48de7aa31095bb8a92d5a0d5f5373 Mon Sep 17 00:00:00 2001 From: Anchen Date: Sun, 31 Dec 2023 01:11:33 +1100 Subject: [PATCH] chore: make the Deepseek example compatible with Yi models. (#205) * Update convert.py * Update convert.py * Update deepseek_coder.py --- llms/deepseek-coder/convert.py | 5 +++-- llms/deepseek-coder/deepseek_coder.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/llms/deepseek-coder/convert.py b/llms/deepseek-coder/convert.py index 06c681c4..9ffa52c3 100644 --- a/llms/deepseek-coder/convert.py +++ b/llms/deepseek-coder/convert.py @@ -44,7 +44,7 @@ def convert(args): config = model.config.to_dict() state_dict = model.state_dict() - tokenizer = AutoTokenizer.from_pretrained(str(hf_path), trust_remote_code=True) + tokenizer = AutoTokenizer.from_pretrained(str(hf_path), trust_remote_code=True, use_fast=False) # things to change # 1. there's no "model." in the weight names @@ -84,7 +84,7 @@ def convert(args): weights = {k: v.numpy() for k, v in state_dict.items()} - config["rope_scaling_factor"] = config["rope_scaling"]["factor"] + config["rope_scaling_factor"] = config["rope_scaling"]["factor"] if config["rope_scaling"] is not None else 1.0 keep_keys = set( [ "vocab_size", @@ -96,6 +96,7 @@ def convert(args): "rms_norm_eps", "intermediate_size", "rope_scaling_factor", + "rope_theta" ] ) for k in list(config.keys()): diff --git a/llms/deepseek-coder/deepseek_coder.py b/llms/deepseek-coder/deepseek_coder.py index 9b8a8a3e..0b93e8ee 100644 --- a/llms/deepseek-coder/deepseek_coder.py +++ b/llms/deepseek-coder/deepseek_coder.py @@ -248,7 +248,7 @@ def load_model(model_path: str): nn.QuantizedLinear.quantize_module(model, **quantization) model.update(tree_unflatten(list(weights.items()))) - tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) + tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True, use_fast=False) return model, tokenizer