diff --git a/llms/mlx_lm/generate.py b/llms/mlx_lm/generate.py
index 0355ca29..29976da2 100644
--- a/llms/mlx_lm/generate.py
+++ b/llms/mlx_lm/generate.py
@@ -90,12 +90,6 @@ def setup_arg_parser():
         action="store_true",
         help="Colorize output based on T[0] probability",
     )
-    parser.add_argument(
-        "--cache-limit-gb",
-        type=int,
-        default=None,
-        help="Set the MLX cache limit in GB",
-    )
     parser.add_argument(
         "--max-kv-size",
         type=int,
@@ -164,9 +158,6 @@ def main():
 
     mx.random.seed(args.seed)
 
-    if args.cache_limit_gb is not None:
-        mx.metal.set_cache_limit(args.cache_limit_gb * 1024 * 1024 * 1024)
-
     # Load the prompt cache and metadata if a cache file is provided
     using_cache = args.prompt_cache_file is not None
     if using_cache: