From 9a6e6541deeb4a78a19b7523d3d0468430296f4b Mon Sep 17 00:00:00 2001 From: Shunta Saito Date: Thu, 13 Feb 2025 13:44:31 +0900 Subject: [PATCH] Fix cache.py to support non-top level layers --- llms/mlx_lm/models/cache.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llms/mlx_lm/models/cache.py b/llms/mlx_lm/models/cache.py index 14026f0c..3083723a 100644 --- a/llms/mlx_lm/models/cache.py +++ b/llms/mlx_lm/models/cache.py @@ -26,7 +26,10 @@ def make_prompt_cache( if hasattr(model, "make_cache"): return model.make_cache() - num_layers = len(model.layers) + if hasattr(model, "layers"): + num_layers = len(model.layers) + else: + num_layers = len(model.model.layers) if max_kv_size is not None: return [ RotatingKVCache(max_size=max_kv_size, keep=4) for _ in range(num_layers)