mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-06-24 09:21:18 +08:00
Add MLX Cache Limit setting for mlx_lm.generate and mlx_lm.server CLI (#744)
* Add support for setting MLX cache limit in GB * Add support for setting MLX cache limit in GB in mlx_lm.server * format --------- Co-authored-by: Awni Hannun <awni@apple.com>
This commit is contained in:
parent
b468091f7f
commit
d1c35fa684
@ -71,6 +71,13 @@ def setup_arg_parser():
|
|||||||
action="store_true",
|
action="store_true",
|
||||||
help="Colorize output based on T[0] probability",
|
help="Colorize output based on T[0] probability",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--cache-limit-gb",
|
||||||
|
type=int,
|
||||||
|
default=None,
|
||||||
|
help="Set the MLX cache limit in GB",
|
||||||
|
required=False,
|
||||||
|
)
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
@ -107,6 +114,9 @@ def main():
|
|||||||
|
|
||||||
mx.random.seed(args.seed)
|
mx.random.seed(args.seed)
|
||||||
|
|
||||||
|
if args.cache_limit_gb is not None:
|
||||||
|
mx.metal.set_cache_limit(args.cache_limit_gb * 1024 * 1024 * 1024)
|
||||||
|
|
||||||
# Building tokenizer_config
|
# Building tokenizer_config
|
||||||
tokenizer_config = {"trust_remote_code": True if args.trust_remote_code else None}
|
tokenizer_config = {"trust_remote_code": True if args.trust_remote_code else None}
|
||||||
if args.eos_token is not None:
|
if args.eos_token is not None:
|
||||||
|
@ -520,6 +520,13 @@ def main():
|
|||||||
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
|
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
|
||||||
help="Set the logging level (default: INFO)",
|
help="Set the logging level (default: INFO)",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--cache-limit-gb",
|
||||||
|
type=int,
|
||||||
|
default=None,
|
||||||
|
help="Set the MLX cache limit in GB",
|
||||||
|
required=False,
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
@ -527,6 +534,10 @@ def main():
|
|||||||
format="%(asctime)s - %(levelname)s - %(message)s",
|
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if args.cache_limit_gb is not None:
|
||||||
|
logging.debug(f"Setting cache limit to {args.cache_limit_gb} GB")
|
||||||
|
mx.metal.set_cache_limit(args.cache_limit_gb * 1024 * 1024 * 1024)
|
||||||
|
|
||||||
# Building tokenizer_config
|
# Building tokenizer_config
|
||||||
tokenizer_config = {"trust_remote_code": True if args.trust_remote_code else None}
|
tokenizer_config = {"trust_remote_code": True if args.trust_remote_code else None}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user