Fix rotating kv cache size (#1093)

2025-09-01 12:49:50 +08:00 · 2024-11-05 10:24:24 -08:00
parent 6fd1f70f73
commit ed9e81dd58
2 changed files with 3 additions and 3 deletions
--- a/llms/mlx_lm/models/base.py
+++ b/llms/mlx_lm/models/base.py
@@ -42,7 +42,7 @@ def create_attention_mask(h: mx.array, cache: Optional[Any] = None):
        if cache is not None and cache[0] is not None:
            c = cache[0]
            if hasattr(c, "max_size"):
-                offset = min(c.max_size - 1, c.offset)
+                offset = min(c.max_size, c.offset)
                window_size = c.max_size
            else:
                offset = c.offset