diff --git a/llms/mlx_lm/lora.py b/llms/mlx_lm/lora.py index 5bcfb829..55abb338 100644 --- a/llms/mlx_lm/lora.py +++ b/llms/mlx_lm/lora.py @@ -80,7 +80,7 @@ def build_parser(): parser.add_argument( "--lora-layers", type=int, - help="Number of layers to fine-tune", + help="Number of layers to fine-tune. Default is 16, use -1 for all.", ) parser.add_argument("--batch-size", type=int, help="Minibatch size.") parser.add_argument("--iters", type=int, help="Iterations to train for.") @@ -143,7 +143,7 @@ def build_parser(): help="Use gradient checkpointing to reduce memory use.", default=None, ) - parser.add_argument("--seed", type=int, default=0, help="The PRNG seed") + parser.add_argument("--seed", type=int, default=None, help="The PRNG seed") parser.add_argument( "--use-dora", action="store_true", default=None, help="Use DoRA to finetune." ) @@ -268,7 +268,7 @@ def main(): config = yaml.load(file, yaml_loader) # Prefer parameters from command-line arguments for k, v in config.items(): - if args.get(k, None) is not None: + if args.get(k, None) is None: args[k] = v # Update defaults for unspecified parameters diff --git a/llms/mlx_lm/requirements.txt b/llms/mlx_lm/requirements.txt index 4e9ab42d..226ac053 100644 --- a/llms/mlx_lm/requirements.txt +++ b/llms/mlx_lm/requirements.txt @@ -1,4 +1,4 @@ -mlx>=0.11 +mlx>=0.13.1 numpy transformers>=4.39.3 protobuf diff --git a/llms/mlx_lm/tuner/utils.py b/llms/mlx_lm/tuner/utils.py index 03f782a1..cc085d78 100644 --- a/llms/mlx_lm/tuner/utils.py +++ b/llms/mlx_lm/tuner/utils.py @@ -54,6 +54,10 @@ def linear_to_lora_layers( """ num_layers = len(model.layers) + + if num_lora_layers < 0: + num_lora_layers = num_layers + if num_lora_layers > num_layers: raise ValueError( f"Requested {num_lora_layers} LoRA layers " diff --git a/llms/mlx_lm/version.py b/llms/mlx_lm/version.py index 70b7614f..be00b8da 100644 --- a/llms/mlx_lm/version.py +++ b/llms/mlx_lm/version.py @@ -1,3 +1,3 @@ # Copyright © 2023-2024 Apple Inc. -__version__ = "0.13.1" +__version__ = "0.14.0"