Fix prompt cache for models without chat template (#1250)

* fix deepseek sharding (#1242) * fix prompt cache with no chat template
2025-09-01 04:14:38 +08:00 · 2025-02-06 11:10:58 -08:00
parent 747c08e202
commit 52c41b5b5a
3 changed files with 7 additions and 7 deletions
--- a/llms/mlx_lm/generate.py
+++ b/llms/mlx_lm/generate.py
@@ -199,7 +199,7 @@ def main():
        if tokenizer.chat_template is None:
            tokenizer.chat_template = tokenizer.default_chat_template
    elif using_cache:
-        tokenizer.chat_template = metadata["chat_template"]
+        tokenizer.chat_template = json.loads(metadata["chat_template"])

    prompt = args.prompt.replace("\\n", "\n").replace("\\t", "\t")
    prompt = sys.stdin.read() if prompt == "-" else prompt