From 9a6e6541deeb4a78a19b7523d3d0468430296f4b Mon Sep 17 00:00:00 2001
From: Shunta Saito <shunta.saito@gmail.com>
Date: Thu, 13 Feb 2025 13:44:31 +0900
Subject: [PATCH] Fix cache.py to support non-top level layers

---
 llms/mlx_lm/models/cache.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llms/mlx_lm/models/cache.py b/llms/mlx_lm/models/cache.py
index 14026f0c..3083723a 100644
--- a/llms/mlx_lm/models/cache.py
+++ b/llms/mlx_lm/models/cache.py
@@ -26,7 +26,10 @@ def make_prompt_cache(
     if hasattr(model, "make_cache"):
         return model.make_cache()
 
-    num_layers = len(model.layers)
+    if hasattr(model, "layers"):
+        num_layers = len(model.layers)
+    else:
+        num_layers = len(model.model.layers)
     if max_kv_size is not None:
         return [
             RotatingKVCache(max_size=max_kv_size, keep=4) for _ in range(num_layers)