fix long prompt generations (#1023)

2025-12-16 02:08:55 +08:00 · 2024-10-09 11:09:36 -07:00
parent fca087be49
commit b7373cb44f
1 changed files with 2 additions and 2 deletions
--- a/llms/mlx_lm/utils.py
+++ b/llms/mlx_lm/utils.py
@@ -239,8 +239,8 @@ def generate_step(
        return y, logprobs.squeeze(0)
    while y.size > prefill_step_size:
-        model(y[:prefill_step_size][None], cache=cache)
+        model(y[:prefill_step_size][None], cache=prompt_cache)
-        mx.eval([c.state for c in cache])
+        mx.eval([c.state for c in prompt_cache])
        y = y[prefill_step_size:]
    y, logprobs = _step(y)