more nits

This commit is contained in:
Awni Hannun 2023-12-15 10:06:14 -08:00
parent fa51553f09
commit d108c558fc
3 changed files with 27 additions and 25 deletions

View File

@ -42,19 +42,21 @@ from Meta.
Convert the model with: Convert the model with:
``` ```
python convert.py <path_to_torch_model> <path_to_mlx_model> python convert.py \
--torch-model <path_to_torch_model> \
--mlx-model <path_to_mlx_model>
``` ```
## Run ## Run
#### Fine-tune
The main script is `lora.py`. To see a full list of options run The main script is `lora.py`. To see a full list of options run
``` ```
python lora.py --help python lora.py --help
``` ```
### Fine-tune
To fine-tune a model use: To fine-tune a model use:
``` ```
@ -67,28 +69,28 @@ Note, the model path should have the MLX weights, the tokenizer, and the
`params.json` configuration which will all be output by the `convert.py` script. `params.json` configuration which will all be output by the `convert.py` script.
By default, the adapter weights are saved in `adapters.npz`. You can specify By default, the adapter weights are saved in `adapters.npz`. You can specify
the output location with `--adapter_file`. the output location with `--adapter-file`.
You can resume fine-tuning with an existing adapter with You can resume fine-tuning with an existing adapter with `--resume-adapter-file
`--resume_adapter_file` to specify the location of the adapter weights. <path_to_adapters.npz>`.
#### Evaluate ### Evaluate
To compute test set perplexity use To compute test set perplexity use
``` ```
python lora.py --model <path_to_model> \ python lora.py --model <path_to_model> \
--adapter_file <path_to_adapters.npz> \ --adapter-file <path_to_adapters.npz> \
--test --test
``` ```
#### Generate ### Generate
For generation use For generation use
``` ```
python lora.py --model <path_to_model> \ python lora.py --model <path_to_model> \
--adapter_file <path_to_adapters.npz> \ --adapter-file <path_to_adapters.npz> \
--num-tokens 50 \ --num-tokens 50 \
--prompt "table: 1-10015132-16 --prompt "table: 1-10015132-16
columns: Player, No., Nationality, Position, Years in Toronto, School/Club Team columns: Player, No., Nationality, Position, Years in Toronto, School/Club Team
@ -119,10 +121,10 @@ You can make your own dataset for fine-tuning with LoRA. You can specify the
dataset with `--data=<my_data_directory>`. Check the subdirectory `data/` to dataset with `--data=<my_data_directory>`. Check the subdirectory `data/` to
see the expected format. see the expected format.
For fine-tuning, the data loader expects a `train.jsonl` and a `valid.jsonl` to For fine-tuning (`--train`), the data loader expects a `train.jsonl` and a
be in the data directory. For evaluation (`--test`), the data loader expects a `valid.jsonl` to be in the data directory. For evaluation (`--test`), the data
`test.jsonl` in the directory. Each line in the `*.jsonl` file should look loader expects a `test.jsonl` in the data directory. Each line in the `*.jsonl`
like: are: file should look like:
``` ```
{"text": "This is an example for the model."} {"text": "This is an example for the model."}

View File

@ -14,13 +14,13 @@ if __name__ == "__main__":
description="Convert Mistral or Llama models to MLX.", description="Convert Mistral or Llama models to MLX.",
) )
parser.add_argument( parser.add_argument(
"--torch_model", "--torch-model",
type=str, type=str,
default="mistral-7B-v0.1/", default="mistral-7B-v0.1/",
help="The torch model directory", help="The torch model directory",
) )
parser.add_argument( parser.add_argument(
"--mlx_model", "--mlx-model",
type=str, type=str,
default="mlx-mistral-7B-v0.1/", default="mlx-mistral-7B-v0.1/",
help="The directory to store the mlx model", help="The directory to store the mlx model",

View File

@ -58,44 +58,44 @@ def build_parser():
help="Directory with {train, valid, test}.jsonl files", help="Directory with {train, valid, test}.jsonl files",
) )
parser.add_argument( parser.add_argument(
"--lora_layers", "--lora-layers",
type=int, type=int,
default=16, default=16,
help="Number of layers to fine-tune", help="Number of layers to fine-tune",
) )
parser.add_argument("--batch_size", type=int, default=4, help="Minibatch size.") parser.add_argument("--batch-size", type=int, default=4, help="Minibatch size.")
parser.add_argument( parser.add_argument(
"--iters", type=int, default=1000, help="Iterations to train for." "--iters", type=int, default=1000, help="Iterations to train for."
) )
parser.add_argument( parser.add_argument(
"--val_batches", "--val-batches",
type=int, type=int,
default=25, default=25,
help="Number of validation batches, -1 uses the entire validation set.", help="Number of validation batches, -1 uses the entire validation set.",
) )
parser.add_argument( parser.add_argument(
"--learning_rate", type=float, default=1e-5, help="Adam learning rate." "--learning-rate", type=float, default=1e-5, help="Adam learning rate."
) )
parser.add_argument( parser.add_argument(
"--steps_per_report", "--steps-per-report",
type=int, type=int,
default=10, default=10,
help="Number of training steps between loss reporting.", help="Number of training steps between loss reporting.",
) )
parser.add_argument( parser.add_argument(
"--steps_per_eval", "--steps-per-eval",
type=int, type=int,
default=200, default=200,
help="Number of training steps between validations.", help="Number of training steps between validations.",
) )
parser.add_argument( parser.add_argument(
"--resume_adapter_file", "--resume-adapter-file",
type=str, type=str,
default=None, default=None,
help="Load path to resume training with the given adapter weights.", help="Load path to resume training with the given adapter weights.",
) )
parser.add_argument( parser.add_argument(
"--adapter_file", "--adapter-file",
type=str, type=str,
default="adapters.npz", default="adapters.npz",
help="Save/load path for the trained adapter weights.", help="Save/load path for the trained adapter weights.",
@ -106,7 +106,7 @@ def build_parser():
help="Evaluate on the test set after training", help="Evaluate on the test set after training",
) )
parser.add_argument( parser.add_argument(
"--test_batches", "--test-batches",
type=int, type=int,
default=500, default=500,
help="Number of test set batches, -1 uses the entire test set.", help="Number of test set batches, -1 uses the entire test set.",