From ef1483420aa154a4265b401fa1df622691a695e0 Mon Sep 17 00:00:00 2001
From: Awni Hannun <awni@apple.com>
Date: Wed, 5 Feb 2025 06:32:35 -0800
Subject: [PATCH] fix prompt cache with no chat template

---
 llms/mlx_lm/cache_prompt.py | 2 +-
 llms/mlx_lm/generate.py     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/llms/mlx_lm/cache_prompt.py b/llms/mlx_lm/cache_prompt.py
index c18f1bae..fff64f78 100644
--- a/llms/mlx_lm/cache_prompt.py
+++ b/llms/mlx_lm/cache_prompt.py
@@ -152,7 +152,7 @@ def main():
     print("Saving...")
     metadata = {}
     metadata["model"] = args.model
-    metadata["chat_template"] = tokenizer.chat_template
+    metadata["chat_template"] = json.dumps(tokenizer.chat_template)
     metadata["tokenizer_config"] = json.dumps(tokenizer_config)
     save_prompt_cache(args.prompt_cache_file, cache, metadata)
 
diff --git a/llms/mlx_lm/generate.py b/llms/mlx_lm/generate.py
index 0d286c75..e7994750 100644
--- a/llms/mlx_lm/generate.py
+++ b/llms/mlx_lm/generate.py
@@ -199,7 +199,7 @@ def main():
         if tokenizer.chat_template is None:
             tokenizer.chat_template = tokenizer.default_chat_template
     elif using_cache:
-        tokenizer.chat_template = metadata["chat_template"]
+        tokenizer.chat_template = json.loads(metadata["chat_template"])
 
     prompt = args.prompt.replace("\\n", "\n").replace("\\t", "\t")
     prompt = sys.stdin.read() if prompt == "-" else prompt