add thinking option

2025-08-28 00:30:09 +08:00 · 2025-02-08 08:50:43 -08:00 · 2025-02-08 08:50:43 -08:00 · b522102b80
commit b522102b80
parent cc9af4f19b
1 changed files with 15 additions and 2 deletions
--- a/llms/mlx_lm/generate.py
+++ b/llms/mlx_lm/generate.py
@ -93,6 +93,12 @@ def setup_arg_parser():
        action="store_true",
        help="Use the default chat template",
    )
+    parser.add_argument(
+        "--chat-template-config",
+        help="Additional config for `apply_chat_template`. Should be a dictionary of"
+        " string keys to values represented as a JSON decodable string.",
+        default=None,
+    )
    parser.add_argument(
        "--verbose",
        type=str2bool,
@ -149,7 +155,6 @@ def setup_arg_parser():
 def main():
    parser = setup_arg_parser()
    args = parser.parse_args()
-
    mx.random.seed(args.seed)

    # Load the prompt cache and metadata if a cache file is provided
@ -195,6 +200,10 @@ def main():
    for eos_token in args.extra_eos_token:
        tokenizer.add_eos_token(eos_token)

+    template_kwargs = {}
+    if args.chat_template_config is not None:
+        template_kwargs = json.loads(args.chat_template_config)
+
    if args.use_default_chat_template:
        if tokenizer.chat_template is None:
            tokenizer.chat_template = tokenizer.default_chat_template
@ -209,8 +218,12 @@ def main():
        else:
            messages = []
        messages.append({"role": "user", "content": prompt})
+
        prompt = tokenizer.apply_chat_template(
-            messages, tokenize=False, add_generation_prompt=True
+            messages,
+            tokenize=False,
+            add_generation_prompt=True,
+            **template_kwargs,
        )

        # Treat the prompt as a suffix assuming that the prefix is in the