mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-09-03 23:14:34 +08:00
Fix rotating kv cache size (#1093)
This commit is contained in:

committed by
GitHub

parent
6fd1f70f73
commit
ed9e81dd58
@@ -325,9 +325,9 @@ class RotatingKVCache(_BaseCache):
|
||||
self.keys = self._temporal_order(self.keys)
|
||||
self.values = self._temporal_order(self.values)
|
||||
|
||||
# The largest size is self.max_size + S - 1 to ensure
|
||||
# The largest size is self.max_size + S to ensure
|
||||
# every token gets at least self.max_size context
|
||||
trim_size = self._idx - self.max_size + 1
|
||||
trim_size = self._idx - self.max_size
|
||||
self.keys = self._trim(trim_size, self.keys, keys)
|
||||
self.values = self._trim(trim_size, self.values, values)
|
||||
self.offset += keys.shape[2]
|
||||
|
Reference in New Issue
Block a user