add thinking option

2025-08-29 06:54:39 +08:00 · 2025-02-08 08:50:43 -08:00 · 2025-02-08 08:50:43 -08:00 · b522102b80
commit b522102b80
parent cc9af4f19b
1 changed files with 15 additions and 2 deletions
--- a/llms/mlx_lm/generate.py
+++ b/llms/mlx_lm/generate.py
@ -93,6 +93,12 @@ def setup_arg_parser():
        action="store_true",
        help="Use the default chat template",
    )
    parser.add_argument(
        "--chat-template-config",
        help="Additional config for `apply_chat_template`. Should be a dictionary of"
        " string keys to values represented as a JSON decodable string.",
        default=None,
    )
    parser.add_argument(
        "--verbose",
        type=str2bool,
@ -149,7 +155,6 @@ def setup_arg_parser():
 def main():
    parser = setup_arg_parser()
    args = parser.parse_args()
    mx.random.seed(args.seed)
    # Load the prompt cache and metadata if a cache file is provided
@ -195,6 +200,10 @@ def main():
    for eos_token in args.extra_eos_token:
        tokenizer.add_eos_token(eos_token)
    template_kwargs = {}
    if args.chat_template_config is not None:
        template_kwargs = json.loads(args.chat_template_config)
    if args.use_default_chat_template:
        if tokenizer.chat_template is None:
            tokenizer.chat_template = tokenizer.default_chat_template
@ -209,8 +218,12 @@ def main():
        else:
            messages = []
        messages.append({"role": "user", "content": prompt})
        prompt = tokenizer.apply_chat_template(
-            messages, tokenize=False, add_generation_prompt=True
+            messages,
            tokenize=False,
            add_generation_prompt=True,
            **template_kwargs,
        )
        # Treat the prompt as a suffix assuming that the prefix is in the