From 82e333898707eb57235f408aa6907beca095f759 Mon Sep 17 00:00:00 2001
From: Anchen
Date: Mon, 4 Nov 2024 22:06:34 +0800
Subject: [PATCH] chore(mlx-lm): add max token arg for mlx_lm.chat (#1089)
* chore(mlx-lm): add max token arg for mlx_lm.chat
* chore: update the default max token value
---
llms/mlx_lm/chat.py | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/llms/mlx_lm/chat.py b/llms/mlx_lm/chat.py
index ea1a99c7..85d32d5f 100644
--- a/llms/mlx_lm/chat.py
+++ b/llms/mlx_lm/chat.py
@@ -11,6 +11,7 @@ from .utils import load, stream_generate
DEFAULT_TEMP = 0.0
DEFAULT_TOP_P = 1.0
DEFAULT_SEED = 0
+DEFAULT_MAX_TOKENS = 256
DEFAULT_MODEL = "mlx-community/Llama-3.2-3B-Instruct-4bit"
@@ -41,6 +42,13 @@ def setup_arg_parser():
help="Set the maximum key-value cache size",
default=None,
)
+ parser.add_argument(
+ "--max-tokens",
+ "-m",
+ type=int,
+ default=DEFAULT_MAX_TOKENS,
+ help="Maximum number of tokens to generate",
+ )
return parser
@@ -70,6 +78,7 @@ def main():
model,
tokenizer,
prompt,
+ args.max_tokens,
temp=args.temp,
top_p=args.top_p,
prompt_cache=prompt_cache,