mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-09-01 04:14:38 +08:00
Handle longer prompt/generation (#931)
* rebase * nits * nit * fix rotating cache with step prefill * update version
This commit is contained in:
@@ -76,7 +76,12 @@ def setup_arg_parser():
|
||||
type=int,
|
||||
default=None,
|
||||
help="Set the MLX cache limit in GB",
|
||||
required=False,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-kv-size",
|
||||
type=int,
|
||||
default=1024,
|
||||
help="Set the maximum key-value cache size",
|
||||
)
|
||||
return parser
|
||||
|
||||
@@ -154,6 +159,7 @@ def main():
|
||||
formatter=formatter,
|
||||
temp=args.temp,
|
||||
top_p=args.top_p,
|
||||
max_kv_size=args.max_kv_size,
|
||||
)
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user