fix encoding with special tokens + chat template (#1189)

2025-12-16 02:08:55 +08:00 · 2025-01-03 10:50:59 -08:00
parent 3a58c36109
commit c4833a2f55
13 changed files with 95 additions and 97 deletions
--- a/llms/README.md
+++ b/llms/README.md
@@ -58,7 +58,7 @@ prompt = "Write a story about Einstein"

 messages = [{"role": "user", "content": prompt}]
 prompt = tokenizer.apply_chat_template(
-    messages, tokenize=False, add_generation_prompt=True
+    messages, add_generation_prompt=True
 )

 text = generate(model, tokenizer, prompt=prompt, verbose=True)
@@ -115,7 +115,7 @@ prompt = "Write a story about Einstein"

 messages = [{"role": "user", "content": prompt}]
 prompt = tokenizer.apply_chat_template(
-    messages, tokenize=False, add_generation_prompt=True
+    messages, add_generation_prompt=True
 )

 for response in stream_generate(model, tokenizer, prompt, max_tokens=512):