From c9994f80e6729b2f6cff7eb010d26c1bf63229fc Mon Sep 17 00:00:00 2001 From: Awni Hannun Date: Tue, 5 Nov 2024 17:18:00 -0800 Subject: [PATCH] fix stream generate --- llms/mlx_lm/utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llms/mlx_lm/utils.py b/llms/mlx_lm/utils.py index 853e1c95..240e5dd9 100644 --- a/llms/mlx_lm/utils.py +++ b/llms/mlx_lm/utils.py @@ -300,10 +300,9 @@ def stream_generate( range(max_tokens), generate_step(prompt_tokens, model, **kwargs), ): - if token == tokenizer.eos_token_id: - break detokenizer.add_token(token) - + if n == (max_tokens - 1) or token == tokenizer.eos_token_id: + break # Yield the last segment if streaming yield detokenizer.last_segment, token, logits