mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-08-29 16:58:45 +08:00
Fix cache.py to support non-top level layers
This commit is contained in:
parent
58686bbcac
commit
9a6e6541de
@ -26,7 +26,10 @@ def make_prompt_cache(
|
|||||||
if hasattr(model, "make_cache"):
|
if hasattr(model, "make_cache"):
|
||||||
return model.make_cache()
|
return model.make_cache()
|
||||||
|
|
||||||
num_layers = len(model.layers)
|
if hasattr(model, "layers"):
|
||||||
|
num_layers = len(model.layers)
|
||||||
|
else:
|
||||||
|
num_layers = len(model.model.layers)
|
||||||
if max_kv_size is not None:
|
if max_kv_size is not None:
|
||||||
return [
|
return [
|
||||||
RotatingKVCache(max_size=max_kv_size, keep=4) for _ in range(num_layers)
|
RotatingKVCache(max_size=max_kv_size, keep=4) for _ in range(num_layers)
|
||||||
|
Loading…
Reference in New Issue
Block a user