Fix prompt cache for models without chat template (#1250)

* fix deepseek sharding (#1242) * fix prompt cache with no chat template
2025-09-01 12:49:50 +08:00 · 2025-02-06 11:10:58 -08:00
parent 747c08e202
commit 52c41b5b5a
3 changed files with 7 additions and 7 deletions
--- a/llms/mlx_lm/cache_prompt.py
+++ b/llms/mlx_lm/cache_prompt.py
@@ -152,7 +152,7 @@ def main():
    print("Saving...")
    metadata = {}
    metadata["model"] = args.model
-    metadata["chat_template"] = tokenizer.chat_template
+    metadata["chat_template"] = json.dumps(tokenizer.chat_template)
    metadata["tokenizer_config"] = json.dumps(tokenizer_config)
    save_prompt_cache(args.prompt_cache_file, cache, metadata)