From 42672f5446d08cd7a427e278290a56e906c6bd35 Mon Sep 17 00:00:00 2001
From: Baptiste Canton <bat@sbz.fr>
Date: Tue, 23 Jan 2024 04:52:42 +0100
Subject: [PATCH] add an option to apply the tokenizer chat template (#338)

* add an option to apply the tokenizer chat template

* fix the option to apply the tokenizer chat template

* better error messages for chat template issues

* apply the chat template by default when possible

* nit in comment'

* rebase

---------

Co-authored-by: Awni Hannun <awni@apple.com>
---
 llms/mlx_lm/generate.py | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/llms/mlx_lm/generate.py b/llms/mlx_lm/generate.py
index 530a3483..31c06eb4 100644
--- a/llms/mlx_lm/generate.py
+++ b/llms/mlx_lm/generate.py
@@ -46,6 +46,11 @@ def setup_arg_parser():
         "--temp", type=float, default=DEFAULT_TEMP, help="Sampling temperature"
     )
     parser.add_argument("--seed", type=int, default=DEFAULT_SEED, help="PRNG seed")
+    parser.add_argument(
+        "--ignore-chat-template",
+        action="store_true",
+        help="Use the raw prompt without the tokenizer's chat template.",
+    )
     return parser
 
 
@@ -58,9 +63,21 @@ def main(args):
         tokenizer_config["eos_token"] = args.eos_token
 
     model, tokenizer = load(args.model, tokenizer_config=tokenizer_config)
+
+    if not args.ignore_chat_template and (
+        hasattr(tokenizer, "apply_chat_template")
+        and tokenizer.chat_template is not None
+    ):
+        messages = [{"role": "user", "content": args.prompt}]
+        prompt = tokenizer.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True
+        )
+    else:
+        prompt = args.prompt
+
     print("=" * 10)
-    print("Prompt:", args.prompt)
-    prompt = tokenizer.encode(args.prompt)
+    print("Prompt:", prompt)
+    prompt = tokenizer.encode(prompt)
     prompt = mx.array(prompt)
     tic = time.time()
     tokens = []