clear cache during prompt processing (#1027)

2025-10-23 22:18:06 +08:00 · 2024-10-09 16:48:32 -07:00
parent b7373cb44f
commit 4360e7ccec
1 changed files with 1 additions and 0 deletions
--- a/llms/mlx_lm/utils.py
+++ b/llms/mlx_lm/utils.py
@@ -242,6 +242,7 @@ def generate_step(
        model(y[:prefill_step_size][None], cache=prompt_cache)
        mx.eval([c.state for c in prompt_cache])
        y = y[prefill_step_size:]
+        mx.metal.clear_cache()

    y, logprobs = _step(y)