From d108c558fccc4acf964530067db00e051a9d4669 Mon Sep 17 00:00:00 2001 From: Awni Hannun Date: Fri, 15 Dec 2023 10:06:14 -0800 Subject: [PATCH] more nits --- lora/README.md | 30 ++++++++++++++++-------------- lora/convert.py | 4 ++-- lora/lora.py | 18 +++++++++--------- 3 files changed, 27 insertions(+), 25 deletions(-) diff --git a/lora/README.md b/lora/README.md index cf50bf66..2be2d78d 100644 --- a/lora/README.md +++ b/lora/README.md @@ -42,19 +42,21 @@ from Meta. Convert the model with: ``` -python convert.py +python convert.py \ + --torch-model \ + --mlx-model ``` ## Run -#### Fine-tune - The main script is `lora.py`. To see a full list of options run ``` python lora.py --help ``` +### Fine-tune + To fine-tune a model use: ``` @@ -67,28 +69,28 @@ Note, the model path should have the MLX weights, the tokenizer, and the `params.json` configuration which will all be output by the `convert.py` script. By default, the adapter weights are saved in `adapters.npz`. You can specify -the output location with `--adapter_file`. +the output location with `--adapter-file`. -You can resume fine-tuning with an existing adapter with -`--resume_adapter_file` to specify the location of the adapter weights. +You can resume fine-tuning with an existing adapter with `--resume-adapter-file +`. -#### Evaluate +### Evaluate To compute test set perplexity use ``` python lora.py --model \ - --adapter_file \ + --adapter-file \ --test ``` -#### Generate +### Generate For generation use ``` python lora.py --model \ - --adapter_file \ + --adapter-file \ --num-tokens 50 \ --prompt "table: 1-10015132-16 columns: Player, No., Nationality, Position, Years in Toronto, School/Club Team @@ -119,10 +121,10 @@ You can make your own dataset for fine-tuning with LoRA. You can specify the dataset with `--data=`. Check the subdirectory `data/` to see the expected format. -For fine-tuning, the data loader expects a `train.jsonl` and a `valid.jsonl` to -be in the data directory. For evaluation (`--test`), the data loader expects a -`test.jsonl` in the directory. Each line in the `*.jsonl` file should look -like: are: +For fine-tuning (`--train`), the data loader expects a `train.jsonl` and a +`valid.jsonl` to be in the data directory. For evaluation (`--test`), the data +loader expects a `test.jsonl` in the data directory. Each line in the `*.jsonl` +file should look like: ``` {"text": "This is an example for the model."} diff --git a/lora/convert.py b/lora/convert.py index cc29f7dc..16af7931 100644 --- a/lora/convert.py +++ b/lora/convert.py @@ -14,13 +14,13 @@ if __name__ == "__main__": description="Convert Mistral or Llama models to MLX.", ) parser.add_argument( - "--torch_model", + "--torch-model", type=str, default="mistral-7B-v0.1/", help="The torch model directory", ) parser.add_argument( - "--mlx_model", + "--mlx-model", type=str, default="mlx-mistral-7B-v0.1/", help="The directory to store the mlx model", diff --git a/lora/lora.py b/lora/lora.py index e957ac05..997b14cb 100644 --- a/lora/lora.py +++ b/lora/lora.py @@ -58,44 +58,44 @@ def build_parser(): help="Directory with {train, valid, test}.jsonl files", ) parser.add_argument( - "--lora_layers", + "--lora-layers", type=int, default=16, help="Number of layers to fine-tune", ) - parser.add_argument("--batch_size", type=int, default=4, help="Minibatch size.") + parser.add_argument("--batch-size", type=int, default=4, help="Minibatch size.") parser.add_argument( "--iters", type=int, default=1000, help="Iterations to train for." ) parser.add_argument( - "--val_batches", + "--val-batches", type=int, default=25, help="Number of validation batches, -1 uses the entire validation set.", ) parser.add_argument( - "--learning_rate", type=float, default=1e-5, help="Adam learning rate." + "--learning-rate", type=float, default=1e-5, help="Adam learning rate." ) parser.add_argument( - "--steps_per_report", + "--steps-per-report", type=int, default=10, help="Number of training steps between loss reporting.", ) parser.add_argument( - "--steps_per_eval", + "--steps-per-eval", type=int, default=200, help="Number of training steps between validations.", ) parser.add_argument( - "--resume_adapter_file", + "--resume-adapter-file", type=str, default=None, help="Load path to resume training with the given adapter weights.", ) parser.add_argument( - "--adapter_file", + "--adapter-file", type=str, default="adapters.npz", help="Save/load path for the trained adapter weights.", @@ -106,7 +106,7 @@ def build_parser(): help="Evaluate on the test set after training", ) parser.add_argument( - "--test_batches", + "--test-batches", type=int, default=500, help="Number of test set batches, -1 uses the entire test set.",