mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-08-30 02:53:41 +08:00
add option to wire model
This commit is contained in:
parent
743763bc2e
commit
645423946a
@ -107,6 +107,14 @@ def setup_arg_parser():
|
|||||||
default=None,
|
default=None,
|
||||||
help="A file containing saved KV caches to avoid recomputing them",
|
help="A file containing saved KV caches to avoid recomputing them",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--wire-model",
|
||||||
|
"-w",
|
||||||
|
action="store_true",
|
||||||
|
help=("Keep the model resident in memory. This can substantially "
|
||||||
|
"speedup generation for models large relative to the machine's RAM.")
|
||||||
|
)
|
||||||
|
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
@ -216,6 +224,14 @@ def main():
|
|||||||
raise ValueError("Cannot use --colorize with --verbose=False")
|
raise ValueError("Cannot use --colorize with --verbose=False")
|
||||||
formatter = colorprint_by_t0 if args.colorize else None
|
formatter = colorprint_by_t0 if args.colorize else None
|
||||||
|
|
||||||
|
if args.wire_model:
|
||||||
|
wired_bytes = mx.metal.wire(model)
|
||||||
|
if wired_bytes >= mx.metal.device_info()["max_recommended_working_set_size"]:
|
||||||
|
raise ValueError(
|
||||||
|
"Cannot wire a model larger than the available RAM. You may "
|
||||||
|
"be able to increase the available RAM by setting "
|
||||||
|
"`sudo sysctl iogpu.wired_limit_mb=N` to a larger value")
|
||||||
|
|
||||||
response = generate(
|
response = generate(
|
||||||
model,
|
model,
|
||||||
tokenizer,
|
tokenizer,
|
||||||
|
Loading…
Reference in New Issue
Block a user