From b7373cb44f2728983ffb99a3d21d61f73230a41e Mon Sep 17 00:00:00 2001 From: Awni Hannun Date: Wed, 9 Oct 2024 11:09:36 -0700 Subject: [PATCH] fix long prompt generations (#1023) --- llms/mlx_lm/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llms/mlx_lm/utils.py b/llms/mlx_lm/utils.py index 8649fbe3..cfbcf29e 100644 --- a/llms/mlx_lm/utils.py +++ b/llms/mlx_lm/utils.py @@ -239,8 +239,8 @@ def generate_step( return y, logprobs.squeeze(0) while y.size > prefill_step_size: - model(y[:prefill_step_size][None], cache=cache) - mx.eval([c.state for c in cache]) + model(y[:prefill_step_size][None], cache=prompt_cache) + mx.eval([c.state for c in prompt_cache]) y = y[prefill_step_size:] y, logprobs = _step(y)