Fix cache.py to support non-top level layers

2025-08-29 06:22:46 +08:00 · 2025-02-13 13:44:31 +09:00 · 2025-02-13 13:44:31 +09:00 · 9a6e6541de
commit 9a6e6541de
parent 58686bbcac
1 changed files with 4 additions and 1 deletions
--- a/llms/mlx_lm/models/cache.py
+++ b/llms/mlx_lm/models/cache.py
@ -26,7 +26,10 @@ def make_prompt_cache(
    if hasattr(model, "make_cache"):
        return model.make_cache()

-    num_layers = len(model.layers)
+    if hasattr(model, "layers"):
+        num_layers = len(model.layers)
+    else:
+        num_layers = len(model.model.layers)
    if max_kv_size is not None:
        return [
            RotatingKVCache(max_size=max_kv_size, keep=4) for _ in range(num_layers)