From 4360e7ccec3d2dd1a2ac96509c74afcaa5e80a95 Mon Sep 17 00:00:00 2001
From: Awni Hannun <awni@apple.com>
Date: Wed, 9 Oct 2024 16:48:32 -0700
Subject: [PATCH] clear cache during prompt processing (#1027)

---
 llms/mlx_lm/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llms/mlx_lm/utils.py b/llms/mlx_lm/utils.py
index cfbcf29e..1e07546e 100644
--- a/llms/mlx_lm/utils.py
+++ b/llms/mlx_lm/utils.py
@@ -242,6 +242,7 @@ def generate_step(
         model(y[:prefill_step_size][None], cache=prompt_cache)
         mx.eval([c.state for c in prompt_cache])
         y = y[prefill_step_size:]
+        mx.metal.clear_cache()
 
     y, logprobs = _step(y)