From 4360e7ccec3d2dd1a2ac96509c74afcaa5e80a95 Mon Sep 17 00:00:00 2001 From: Awni Hannun Date: Wed, 9 Oct 2024 16:48:32 -0700 Subject: [PATCH] clear cache during prompt processing (#1027) --- llms/mlx_lm/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/llms/mlx_lm/utils.py b/llms/mlx_lm/utils.py index cfbcf29e..1e07546e 100644 --- a/llms/mlx_lm/utils.py +++ b/llms/mlx_lm/utils.py @@ -242,6 +242,7 @@ def generate_step( model(y[:prefill_step_size][None], cache=prompt_cache) mx.eval([c.state for c in prompt_cache]) y = y[prefill_step_size:] + mx.metal.clear_cache() y, logprobs = _step(y)