mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-08-29 18:26:37 +08:00
Generate: Support Prefill Prompt
python -m mlx_lm.generate \ --model mlx-community/DeepSeek-R1-Distill-Qwen-1.5B-4bit \ --prompt "hello" \ --prefill-prompt "<think>\n"
This commit is contained in:
parent
09b641aaa7
commit
431ece6c5b
@ -60,6 +60,11 @@ def setup_arg_parser():
|
|||||||
default=DEFAULT_PROMPT,
|
default=DEFAULT_PROMPT,
|
||||||
help="Message to be processed by the model ('-' reads from stdin)",
|
help="Message to be processed by the model ('-' reads from stdin)",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--prefill-prompt",
|
||||||
|
default=None,
|
||||||
|
help="Prefill prompt to be used for the chat template",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--max-tokens",
|
"--max-tokens",
|
||||||
"-m",
|
"-m",
|
||||||
@ -219,6 +224,15 @@ def main():
|
|||||||
messages = []
|
messages = []
|
||||||
messages.append({"role": "user", "content": prompt})
|
messages.append({"role": "user", "content": prompt})
|
||||||
|
|
||||||
|
if args.prefill_prompt is not None:
|
||||||
|
messages.append({"role": "assistant", "content": args.prefill_prompt})
|
||||||
|
prompt = tokenizer.apply_chat_template(
|
||||||
|
messages,
|
||||||
|
tokenize=False,
|
||||||
|
continue_final_message=True,
|
||||||
|
**template_kwargs,
|
||||||
|
)
|
||||||
|
else:
|
||||||
prompt = tokenizer.apply_chat_template(
|
prompt = tokenizer.apply_chat_template(
|
||||||
messages,
|
messages,
|
||||||
tokenize=False,
|
tokenize=False,
|
||||||
|
Loading…
Reference in New Issue
Block a user