From d108c558fccc4acf964530067db00e051a9d4669 Mon Sep 17 00:00:00 2001
From: Awni Hannun <awni@apple.com>
Date: Fri, 15 Dec 2023 10:06:14 -0800
Subject: [PATCH] more nits

---
 lora/README.md  | 30 ++++++++++++++++--------------
 lora/convert.py |  4 ++--
 lora/lora.py    | 18 +++++++++---------
 3 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/lora/README.md b/lora/README.md
index cf50bf66..2be2d78d 100644
--- a/lora/README.md
+++ b/lora/README.md
@@ -42,19 +42,21 @@ from Meta.
 Convert the model with:
 
 ```
-python convert.py <path_to_torch_model> <path_to_mlx_model>
+python convert.py \
+    --torch-model <path_to_torch_model> \
+    --mlx-model <path_to_mlx_model>
 ```
 
 ## Run
 
-#### Fine-tune
-
 The main script is `lora.py`. To see a full list of options run
 
 ```
 python lora.py --help
 ```
 
+### Fine-tune
+
 To fine-tune a model use:
 
 ```
@@ -67,28 +69,28 @@ Note, the model path should have the MLX weights, the tokenizer, and the
 `params.json` configuration which will all be output by the `convert.py` script.
 
 By default, the adapter weights are saved in `adapters.npz`. You can specify
-the output location with `--adapter_file`.
+the output location with `--adapter-file`.
 
-You can resume fine-tuning with an existing adapter with
-`--resume_adapter_file` to specify the location of the adapter weights. 
+You can resume fine-tuning with an existing adapter with `--resume-adapter-file
+<path_to_adapters.npz>`. 
 
-#### Evaluate
+### Evaluate
 
 To compute test set perplexity use
 
 ```
 python lora.py --model <path_to_model> \
-               --adapter_file <path_to_adapters.npz> \
+               --adapter-file <path_to_adapters.npz> \
                --test 
 ```
 
-#### Generate
+### Generate
 
 For generation use
 
 ```
 python lora.py --model <path_to_model> \
-               --adapter_file <path_to_adapters.npz> \
+               --adapter-file <path_to_adapters.npz> \
                --num-tokens 50 \
                --prompt "table: 1-10015132-16
 columns: Player, No., Nationality, Position, Years in Toronto, School/Club Team
@@ -119,10 +121,10 @@ You can make your own dataset for fine-tuning with LoRA. You can specify the
 dataset with `--data=<my_data_directory>`. Check the subdirectory `data/` to
 see the expected format.
 
-For fine-tuning, the data loader expects a `train.jsonl` and a `valid.jsonl` to
-be in the data directory. For evaluation (`--test`), the data loader expects a
-`test.jsonl` in the directory. Each line in the `*.jsonl` file should look
-like: are:
+For fine-tuning (`--train`), the data loader expects a `train.jsonl` and a
+`valid.jsonl` to be in the data directory. For evaluation (`--test`), the data
+loader expects a `test.jsonl` in the data directory. Each line in the `*.jsonl`
+file should look like:
 
 ```
 {"text": "This is an example for the model."}
diff --git a/lora/convert.py b/lora/convert.py
index cc29f7dc..16af7931 100644
--- a/lora/convert.py
+++ b/lora/convert.py
@@ -14,13 +14,13 @@ if __name__ == "__main__":
         description="Convert Mistral or Llama models to MLX.",
     )
     parser.add_argument(
-        "--torch_model",
+        "--torch-model",
         type=str,
         default="mistral-7B-v0.1/",
         help="The torch model directory",
     )
     parser.add_argument(
-        "--mlx_model",
+        "--mlx-model",
         type=str,
         default="mlx-mistral-7B-v0.1/",
         help="The directory to store the mlx model",
diff --git a/lora/lora.py b/lora/lora.py
index e957ac05..997b14cb 100644
--- a/lora/lora.py
+++ b/lora/lora.py
@@ -58,44 +58,44 @@ def build_parser():
         help="Directory with {train, valid, test}.jsonl files",
     )
     parser.add_argument(
-        "--lora_layers",
+        "--lora-layers",
         type=int,
         default=16,
         help="Number of layers to fine-tune",
     )
-    parser.add_argument("--batch_size", type=int, default=4, help="Minibatch size.")
+    parser.add_argument("--batch-size", type=int, default=4, help="Minibatch size.")
     parser.add_argument(
         "--iters", type=int, default=1000, help="Iterations to train for."
     )
     parser.add_argument(
-        "--val_batches",
+        "--val-batches",
         type=int,
         default=25,
         help="Number of validation batches, -1 uses the entire validation set.",
     )
     parser.add_argument(
-        "--learning_rate", type=float, default=1e-5, help="Adam learning rate."
+        "--learning-rate", type=float, default=1e-5, help="Adam learning rate."
     )
     parser.add_argument(
-        "--steps_per_report",
+        "--steps-per-report",
         type=int,
         default=10,
         help="Number of training steps between loss reporting.",
     )
     parser.add_argument(
-        "--steps_per_eval",
+        "--steps-per-eval",
         type=int,
         default=200,
         help="Number of training steps between validations.",
     )
     parser.add_argument(
-        "--resume_adapter_file",
+        "--resume-adapter-file",
         type=str,
         default=None,
         help="Load path to resume training with the given adapter weights.",
     )
     parser.add_argument(
-        "--adapter_file",
+        "--adapter-file",
         type=str,
         default="adapters.npz",
         help="Save/load path for the trained adapter weights.",
@@ -106,7 +106,7 @@ def build_parser():
         help="Evaluate on the test set after training",
     )
     parser.add_argument(
-        "--test_batches",
+        "--test-batches",
         type=int,
         default=500,
         help="Number of test set batches, -1 uses the entire test set.",