diff --git a/llms/mlx_lm/LORA.md b/llms/mlx_lm/LORA.md
index 74ec07f6..22ca3ad7 100644
--- a/llms/mlx_lm/LORA.md
+++ b/llms/mlx_lm/LORA.md
@@ -23,14 +23,24 @@ LoRA (QLoRA).[^qlora] LoRA fine-tuning works with the following model families:
 
 ## Run
 
-The main command is `mlx_lm.lora`. To see a full list of options run:
+The main command is `mlx_lm.lora`. To see a full list of command-line options run:
 
 ```shell
 python -m mlx_lm.lora --help
 ```
 
 Note, in the following the `--model` argument can be any compatible Hugging
-Face repo or a local path to a converted model. 
+Face repo or a local path to a converted model.
+
+You can also specify a YAML config with `-c`/`--config`. For more on the format see the
+[example YAML](examples/lora_config.yaml). For example:
+
+```shell
+python -m mlx_lm.lora --config /path/to/config.yaml
+```
+
+If command-line flags are also used, they will override the corresponding
+values in the config.
 
 ### Fine-tune
 
@@ -74,7 +84,7 @@ python -m mlx_lm.lora \
 
 ### Generate
 
-For generation use mlx_lm.generate:
+For generation use `mlx_lm.generate`:
 
 ```shell
 python -m mlx_lm.generate \
diff --git a/llms/mlx_lm/examples/lora_config.yaml b/llms/mlx_lm/examples/lora_config.yaml
new file mode 100644
index 00000000..dc324358
--- /dev/null
+++ b/llms/mlx_lm/examples/lora_config.yaml
@@ -0,0 +1,50 @@
+# The path to the local model directory or Hugging Face repo.
+model: "mlx_model"
+
+# Whether or not to train (boolean)
+train: true
+
+# Directory with {train, valid, test}.jsonl files
+data: "/path/to/training/data"
+
+# The PRNG seed
+seed: 0
+
+# Number of layers to fine-tune
+lora_layers: 16
+
+# Minibatch size.
+batch_size: 4
+
+# Iterations to train for.
+iters: 100
+
+# Number of validation batches, -1 uses the entire validation set.
+val_batches: 25
+
+# Adam learning rate.
+learning_rate: 1e-5
+
+# Number of training steps between loss reporting.
+steps_per_report: 10
+
+# Number of training steps between validations.
+steps_per_eval: 200
+
+# Load path to resume training with the given adapter weights.
+resume_adapter_file: null
+
+# Save/load path for the trained adapter weights.
+adapter_file: "adapters.npz"
+
+# Save the model every N iterations.
+save_every: 100
+
+# Evaluate on the test set after training
+test: false
+
+# Number of test set batches, -1 uses the entire test set.
+test_batches: 500
+
+# Maximum sequence length.
+max_seq_length: 2048
diff --git a/llms/mlx_lm/lora.py b/llms/mlx_lm/lora.py
index d316efe4..5f57eb04 100644
--- a/llms/mlx_lm/lora.py
+++ b/llms/mlx_lm/lora.py
@@ -1,22 +1,61 @@
 import argparse
 import json
 import math
+import re
+import types
 from pathlib import Path
 
 import mlx.optimizers as optim
 import numpy as np
+import yaml
 from mlx.utils import tree_flatten
 
 from .tuner.trainer import TrainingArgs, TrainingCallback, evaluate, train
 from .tuner.utils import linear_to_lora_layers
 from .utils import load
 
+yaml_loader = yaml.SafeLoader
+yaml_loader.add_implicit_resolver(
+    "tag:yaml.org,2002:float",
+    re.compile(
+        """^(?:
+     [-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)?
+    |[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+)
+    |\\.[0-9_]+(?:[eE][-+][0-9]+)?
+    |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*
+    |[-+]?\\.(?:inf|Inf|INF)
+    |\\.(?:nan|NaN|NAN))$""",
+        re.X,
+    ),
+    list("-+0123456789."),
+)
+
+
+CONFIG_DEFAULTS = {
+    "model": "mlx_model",
+    "train": False,
+    "data": "data/",
+    "seed": 0,
+    "lora_layers": 16,
+    "batch_size": 4,
+    "iters": 1000,
+    "val_batches": 25,
+    "learning_rate": 1e-5,
+    "steps_per_report": 10,
+    "steps_per_eval": 200,
+    "resume_adapter_file": None,
+    "adapter_file": "adapters.npz",
+    "save_every": 100,
+    "test": False,
+    "test_batches": 500,
+    "max_seq_length": 2048,
+}
+
 
 def build_parser():
     parser = argparse.ArgumentParser(description="LoRA or QLoRA finetuning.")
     parser.add_argument(
         "--model",
-        default="mlx_model",
         help="The path to the local model directory or Hugging Face repo.",
     )
     # Generation args
@@ -24,18 +63,14 @@ def build_parser():
         "--max-tokens",
         "-m",
         type=int,
-        default=100,
         help="The maximum number of tokens to generate",
     )
-    parser.add_argument(
-        "--temp", type=float, default=0.8, help="The sampling temperature"
-    )
+    parser.add_argument("--temp", type=float, help="The sampling temperature")
     parser.add_argument(
         "--prompt",
         "-p",
         type=str,
         help="The prompt for generation",
-        default=None,
     )
 
     # Training args
@@ -47,56 +82,44 @@ def build_parser():
     parser.add_argument(
         "--data",
         type=str,
-        default="data/",
         help="Directory with {train, valid, test}.jsonl files",
     )
     parser.add_argument(
         "--lora-layers",
         type=int,
-        default=16,
         help="Number of layers to fine-tune",
     )
-    parser.add_argument("--batch-size", type=int, default=4, help="Minibatch size.")
-    parser.add_argument(
-        "--iters", type=int, default=1000, help="Iterations to train for."
-    )
+    parser.add_argument("--batch-size", type=int, help="Minibatch size.")
+    parser.add_argument("--iters", type=int, help="Iterations to train for.")
     parser.add_argument(
         "--val-batches",
         type=int,
-        default=25,
         help="Number of validation batches, -1 uses the entire validation set.",
     )
-    parser.add_argument(
-        "--learning-rate", type=float, default=1e-5, help="Adam learning rate."
-    )
+    parser.add_argument("--learning-rate", type=float, help="Adam learning rate.")
     parser.add_argument(
         "--steps-per-report",
         type=int,
-        default=10,
         help="Number of training steps between loss reporting.",
     )
     parser.add_argument(
         "--steps-per-eval",
         type=int,
-        default=200,
         help="Number of training steps between validations.",
     )
     parser.add_argument(
         "--resume-adapter-file",
         type=str,
-        default=None,
         help="Load path to resume training with the given adapter weights.",
     )
     parser.add_argument(
         "--adapter-file",
         type=str,
-        default="adapters.npz",
         help="Save/load path for the trained adapter weights.",
     )
     parser.add_argument(
         "--save-every",
         type=int,
-        default=100,
         help="Save the model every N iterations.",
     )
     parser.add_argument(
@@ -107,16 +130,20 @@ def build_parser():
     parser.add_argument(
         "--test-batches",
         type=int,
-        default=500,
         help="Number of test set batches, -1 uses the entire test set.",
     )
     parser.add_argument(
         "--max-seq-length",
         type=int,
-        default=2048,
         help="Maximum sequence length.",
     )
-    parser.add_argument("--seed", type=int, default=0, help="The PRNG seed")
+    parser.add_argument(
+        "-c",
+        "--config",
+        default=None,
+        help="A YAML configuration file with the training options",
+    )
+    parser.add_argument("--seed", type=int, help="The PRNG seed")
     return parser
 
 
@@ -242,5 +269,19 @@ def run(args, training_callback: TrainingCallback = None):
 if __name__ == "__main__":
     parser = build_parser()
     args = parser.parse_args()
+    config = args.config
+    args = vars(args)
+    if config:
+        print("Loading configuration file", config)
+        with open(config, "r") as file:
+            config = yaml.load(file, yaml_loader)
+        # Prefer parameters from command-line arguments
+        for k, v in config.items():
+            if not args.get(k, None):
+                args[k] = v
 
-    run(args)
+    # Update defaults for unspecified parameters
+    for k, v in CONFIG_DEFAULTS.items():
+        if not args.get(k, None):
+            args[k] = v
+    run(types.SimpleNamespace(**args))