mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-08-29 06:22:46 +08:00
Fix cache.py to support non-top level layers
This commit is contained in:
parent
58686bbcac
commit
9a6e6541de
@ -26,7 +26,10 @@ def make_prompt_cache(
|
||||
if hasattr(model, "make_cache"):
|
||||
return model.make_cache()
|
||||
|
||||
num_layers = len(model.layers)
|
||||
if hasattr(model, "layers"):
|
||||
num_layers = len(model.layers)
|
||||
else:
|
||||
num_layers = len(model.model.layers)
|
||||
if max_kv_size is not None:
|
||||
return [
|
||||
RotatingKVCache(max_size=max_kv_size, keep=4) for _ in range(num_layers)
|
||||
|
Loading…
Reference in New Issue
Block a user