more nits

2025-10-23 14:08:07 +08:00 · 2023-12-15 10:06:14 -08:00
parent fa51553f09
commit d108c558fc
3 changed files with 27 additions and 25 deletions
--- a/lora/README.md
+++ b/lora/README.md
@@ -42,19 +42,21 @@ from Meta.
 Convert the model with:

 ```
-python convert.py <path_to_torch_model> <path_to_mlx_model>
+python convert.py \
+    --torch-model <path_to_torch_model> \
+    --mlx-model <path_to_mlx_model>
 ```

 ## Run

-#### Fine-tune
-
 The main script is `lora.py`. To see a full list of options run

 ```
 python lora.py --help
 ```

+### Fine-tune
+
 To fine-tune a model use:

 ```
@@ -67,28 +69,28 @@ Note, the model path should have the MLX weights, the tokenizer, and the
 `params.json` configuration which will all be output by the `convert.py` script.

 By default, the adapter weights are saved in `adapters.npz`. You can specify
-the output location with `--adapter_file`.
+the output location with `--adapter-file`.

-You can resume fine-tuning with an existing adapter with
-`--resume_adapter_file` to specify the location of the adapter weights. 
+You can resume fine-tuning with an existing adapter with `--resume-adapter-file
+<path_to_adapters.npz>`. 

-#### Evaluate
+### Evaluate

 To compute test set perplexity use

 ```
 python lora.py --model <path_to_model> \
-               --adapter_file <path_to_adapters.npz> \
+               --adapter-file <path_to_adapters.npz> \
               --test 
 ```

-#### Generate
+### Generate

 For generation use

 ```
 python lora.py --model <path_to_model> \
-               --adapter_file <path_to_adapters.npz> \
+               --adapter-file <path_to_adapters.npz> \
               --num-tokens 50 \
               --prompt "table: 1-10015132-16
 columns: Player, No., Nationality, Position, Years in Toronto, School/Club Team
@@ -119,10 +121,10 @@ You can make your own dataset for fine-tuning with LoRA. You can specify the
 dataset with `--data=<my_data_directory>`. Check the subdirectory `data/` to
 see the expected format.

-For fine-tuning, the data loader expects a `train.jsonl` and a `valid.jsonl` to
-be in the data directory. For evaluation (`--test`), the data loader expects a
-`test.jsonl` in the directory. Each line in the `*.jsonl` file should look
-like: are:
+For fine-tuning (`--train`), the data loader expects a `train.jsonl` and a
+`valid.jsonl` to be in the data directory. For evaluation (`--test`), the data
+loader expects a `test.jsonl` in the data directory. Each line in the `*.jsonl`
+file should look like:

 ```
 {"text": "This is an example for the model."}
--- a/lora/convert.py
+++ b/lora/convert.py
@@ -14,13 +14,13 @@ if __name__ == "__main__":
        description="Convert Mistral or Llama models to MLX.",
    )
    parser.add_argument(
-        "--torch_model",
+        "--torch-model",
        type=str,
        default="mistral-7B-v0.1/",
        help="The torch model directory",
    )
    parser.add_argument(
-        "--mlx_model",
+        "--mlx-model",
        type=str,
        default="mlx-mistral-7B-v0.1/",
        help="The directory to store the mlx model",
--- a/lora/lora.py
+++ b/lora/lora.py
@@ -58,44 +58,44 @@ def build_parser():
        help="Directory with {train, valid, test}.jsonl files",
    )
    parser.add_argument(
-        "--lora_layers",
+        "--lora-layers",
        type=int,
        default=16,
        help="Number of layers to fine-tune",
    )
-    parser.add_argument("--batch_size", type=int, default=4, help="Minibatch size.")
+    parser.add_argument("--batch-size", type=int, default=4, help="Minibatch size.")
    parser.add_argument(
        "--iters", type=int, default=1000, help="Iterations to train for."
    )
    parser.add_argument(
-        "--val_batches",
+        "--val-batches",
        type=int,
        default=25,
        help="Number of validation batches, -1 uses the entire validation set.",
    )
    parser.add_argument(
-        "--learning_rate", type=float, default=1e-5, help="Adam learning rate."
+        "--learning-rate", type=float, default=1e-5, help="Adam learning rate."
    )
    parser.add_argument(
-        "--steps_per_report",
+        "--steps-per-report",
        type=int,
        default=10,
        help="Number of training steps between loss reporting.",
    )
    parser.add_argument(
-        "--steps_per_eval",
+        "--steps-per-eval",
        type=int,
        default=200,
        help="Number of training steps between validations.",
    )
    parser.add_argument(
-        "--resume_adapter_file",
+        "--resume-adapter-file",
        type=str,
        default=None,
        help="Load path to resume training with the given adapter weights.",
    )
    parser.add_argument(
-        "--adapter_file",
+        "--adapter-file",
        type=str,
        default="adapters.npz",
        help="Save/load path for the trained adapter weights.",
@@ -106,7 +106,7 @@ def build_parser():
        help="Evaluate on the test set after training",
    )
    parser.add_argument(
-        "--test_batches",
+        "--test-batches",
        type=int,
        default=500,
        help="Number of test set batches, -1 uses the entire test set.",