fix encoding with special tokens + chat template (#1189)

This commit is contained in:
Awni Hannun
2025-01-03 10:50:59 -08:00
committed by GitHub
parent 3a58c36109
commit c4833a2f55
13 changed files with 95 additions and 97 deletions

View File

@@ -15,9 +15,7 @@ prompt_cache = make_prompt_cache(model)
# User turn
prompt = "Hi my name is <Name>."
messages = [{"role": "user", "content": prompt}]
prompt = tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
# Assistant response
response = generate(
@@ -32,9 +30,7 @@ response = generate(
# User turn
prompt = "What's my name?"
messages = [{"role": "user", "content": prompt}]
prompt = tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
# Assistant response
response = generate(

View File

@@ -14,7 +14,7 @@ conversation = [{"role": "user", "content": prompt}]
# Transform the prompt into the chat template
prompt = tokenizer.apply_chat_template(
conversation=conversation, tokenize=False, add_generation_prompt=True
conversation=conversation, add_generation_prompt=True
)
# Specify the maximum number of tokens