From ef1483420aa154a4265b401fa1df622691a695e0 Mon Sep 17 00:00:00 2001 From: Awni Hannun Date: Wed, 5 Feb 2025 06:32:35 -0800 Subject: [PATCH] fix prompt cache with no chat template --- llms/mlx_lm/cache_prompt.py | 2 +- llms/mlx_lm/generate.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llms/mlx_lm/cache_prompt.py b/llms/mlx_lm/cache_prompt.py index c18f1bae..fff64f78 100644 --- a/llms/mlx_lm/cache_prompt.py +++ b/llms/mlx_lm/cache_prompt.py @@ -152,7 +152,7 @@ def main(): print("Saving...") metadata = {} metadata["model"] = args.model - metadata["chat_template"] = tokenizer.chat_template + metadata["chat_template"] = json.dumps(tokenizer.chat_template) metadata["tokenizer_config"] = json.dumps(tokenizer_config) save_prompt_cache(args.prompt_cache_file, cache, metadata) diff --git a/llms/mlx_lm/generate.py b/llms/mlx_lm/generate.py index 0d286c75..e7994750 100644 --- a/llms/mlx_lm/generate.py +++ b/llms/mlx_lm/generate.py @@ -199,7 +199,7 @@ def main(): if tokenizer.chat_template is None: tokenizer.chat_template = tokenizer.default_chat_template elif using_cache: - tokenizer.chat_template = metadata["chat_template"] + tokenizer.chat_template = json.loads(metadata["chat_template"]) prompt = args.prompt.replace("\\n", "\n").replace("\\t", "\t") prompt = sys.stdin.read() if prompt == "-" else prompt