diff --git a/llms/mlx_lm/generate.py b/llms/mlx_lm/generate.py index 0355ca29..29976da2 100644 --- a/llms/mlx_lm/generate.py +++ b/llms/mlx_lm/generate.py @@ -90,12 +90,6 @@ def setup_arg_parser(): action="store_true", help="Colorize output based on T[0] probability", ) - parser.add_argument( - "--cache-limit-gb", - type=int, - default=None, - help="Set the MLX cache limit in GB", - ) parser.add_argument( "--max-kv-size", type=int, @@ -164,9 +158,6 @@ def main(): mx.random.seed(args.seed) - if args.cache_limit_gb is not None: - mx.metal.set_cache_limit(args.cache_limit_gb * 1024 * 1024 * 1024) - # Load the prompt cache and metadata if a cache file is provided using_cache = args.prompt_cache_file is not None if using_cache: