mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-08-29 08:43:26 +08:00
nits
This commit is contained in:
parent
9f9da6af23
commit
109eb4e942
@ -224,21 +224,16 @@ def main():
|
|||||||
messages = []
|
messages = []
|
||||||
messages.append({"role": "user", "content": prompt})
|
messages.append({"role": "user", "content": prompt})
|
||||||
|
|
||||||
if args.prefill_response is not None:
|
has_prefill = args.prefill_response is not None
|
||||||
|
if has_prefill:
|
||||||
messages.append({"role": "assistant", "content": args.prefill_response})
|
messages.append({"role": "assistant", "content": args.prefill_response})
|
||||||
prompt = tokenizer.apply_chat_template(
|
prompt = tokenizer.apply_chat_template(
|
||||||
messages,
|
messages,
|
||||||
tokenize=False,
|
tokenize=False,
|
||||||
continue_final_message=True,
|
continue_final_message=has_prefill,
|
||||||
**template_kwargs,
|
add_generation_prompt=not has_prefill,
|
||||||
)
|
**template_kwargs,
|
||||||
else:
|
)
|
||||||
prompt = tokenizer.apply_chat_template(
|
|
||||||
messages,
|
|
||||||
tokenize=False,
|
|
||||||
add_generation_prompt=True,
|
|
||||||
**template_kwargs,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Treat the prompt as a suffix assuming that the prefix is in the
|
# Treat the prompt as a suffix assuming that the prefix is in the
|
||||||
# stored kv cache.
|
# stored kv cache.
|
||||||
@ -247,7 +242,8 @@ def main():
|
|||||||
test_prompt = tokenizer.apply_chat_template(
|
test_prompt = tokenizer.apply_chat_template(
|
||||||
messages,
|
messages,
|
||||||
tokenize=False,
|
tokenize=False,
|
||||||
add_generation_prompt=True,
|
continue_final_message=has_prefill,
|
||||||
|
add_generation_prompt=not has_prefill,
|
||||||
)
|
)
|
||||||
prompt = prompt[test_prompt.index("<query>") :]
|
prompt = prompt[test_prompt.index("<query>") :]
|
||||||
prompt = tokenizer.encode(prompt, add_special_tokens=False)
|
prompt = tokenizer.encode(prompt, add_special_tokens=False)
|
||||||
|
Loading…
Reference in New Issue
Block a user