nits

2025-08-29 08:43:26 +08:00 · 2025-02-27 07:39:15 -08:00 · 2025-02-27 07:39:15 -08:00 · 109eb4e942
commit 109eb4e942
parent 9f9da6af23
1 changed files with 11 additions and 15 deletions
--- a/llms/mlx_lm/generate.py
+++ b/llms/mlx_lm/generate.py
@ -224,21 +224,16 @@ def main():
            messages = []
        messages.append({"role": "user", "content": prompt})
-        if args.prefill_response is not None:
+        has_prefill = args.prefill_response is not None
        if has_prefill:
            messages.append({"role": "assistant", "content": args.prefill_response})
-            prompt = tokenizer.apply_chat_template(
+        prompt = tokenizer.apply_chat_template(
-                messages,
+            messages,
-                tokenize=False,
+            tokenize=False,
-                continue_final_message=True,
+            continue_final_message=has_prefill,
-                **template_kwargs,
+            add_generation_prompt=not has_prefill,
-            )
+            **template_kwargs,
-        else:
+        )
            prompt = tokenizer.apply_chat_template(
                messages,
                tokenize=False,
                add_generation_prompt=True,
                **template_kwargs,
            )
        # Treat the prompt as a suffix assuming that the prefix is in the
        # stored kv cache.
@ -247,7 +242,8 @@ def main():
            test_prompt = tokenizer.apply_chat_template(
                messages,
                tokenize=False,
-                add_generation_prompt=True,
+                continue_final_message=has_prefill,
                add_generation_prompt=not has_prefill,
            )
            prompt = prompt[test_prompt.index("<query>") :]
        prompt = tokenizer.encode(prompt, add_special_tokens=False)