more nits

This commit is contained in:
Awni Hannun 2023-12-15 10:06:14 -08:00
parent fa51553f09
commit d108c558fc
3 changed files with 27 additions and 25 deletions

View File

@ -42,19 +42,21 @@ from Meta.
Convert the model with:
```
python convert.py <path_to_torch_model> <path_to_mlx_model>
python convert.py \
--torch-model <path_to_torch_model> \
--mlx-model <path_to_mlx_model>
```
## Run
#### Fine-tune
The main script is `lora.py`. To see a full list of options run
```
python lora.py --help
```
### Fine-tune
To fine-tune a model use:
```
@ -67,28 +69,28 @@ Note, the model path should have the MLX weights, the tokenizer, and the
`params.json` configuration which will all be output by the `convert.py` script.
By default, the adapter weights are saved in `adapters.npz`. You can specify
the output location with `--adapter_file`.
the output location with `--adapter-file`.
You can resume fine-tuning with an existing adapter with
`--resume_adapter_file` to specify the location of the adapter weights.
You can resume fine-tuning with an existing adapter with `--resume-adapter-file
<path_to_adapters.npz>`.
#### Evaluate
### Evaluate
To compute test set perplexity use
```
python lora.py --model <path_to_model> \
--adapter_file <path_to_adapters.npz> \
--adapter-file <path_to_adapters.npz> \
--test
```
#### Generate
### Generate
For generation use
```
python lora.py --model <path_to_model> \
--adapter_file <path_to_adapters.npz> \
--adapter-file <path_to_adapters.npz> \
--num-tokens 50 \
--prompt "table: 1-10015132-16
columns: Player, No., Nationality, Position, Years in Toronto, School/Club Team
@ -119,10 +121,10 @@ You can make your own dataset for fine-tuning with LoRA. You can specify the
dataset with `--data=<my_data_directory>`. Check the subdirectory `data/` to
see the expected format.
For fine-tuning, the data loader expects a `train.jsonl` and a `valid.jsonl` to
be in the data directory. For evaluation (`--test`), the data loader expects a
`test.jsonl` in the directory. Each line in the `*.jsonl` file should look
like: are:
For fine-tuning (`--train`), the data loader expects a `train.jsonl` and a
`valid.jsonl` to be in the data directory. For evaluation (`--test`), the data
loader expects a `test.jsonl` in the data directory. Each line in the `*.jsonl`
file should look like:
```
{"text": "This is an example for the model."}

View File

@ -14,13 +14,13 @@ if __name__ == "__main__":
description="Convert Mistral or Llama models to MLX.",
)
parser.add_argument(
"--torch_model",
"--torch-model",
type=str,
default="mistral-7B-v0.1/",
help="The torch model directory",
)
parser.add_argument(
"--mlx_model",
"--mlx-model",
type=str,
default="mlx-mistral-7B-v0.1/",
help="The directory to store the mlx model",

View File

@ -58,44 +58,44 @@ def build_parser():
help="Directory with {train, valid, test}.jsonl files",
)
parser.add_argument(
"--lora_layers",
"--lora-layers",
type=int,
default=16,
help="Number of layers to fine-tune",
)
parser.add_argument("--batch_size", type=int, default=4, help="Minibatch size.")
parser.add_argument("--batch-size", type=int, default=4, help="Minibatch size.")
parser.add_argument(
"--iters", type=int, default=1000, help="Iterations to train for."
)
parser.add_argument(
"--val_batches",
"--val-batches",
type=int,
default=25,
help="Number of validation batches, -1 uses the entire validation set.",
)
parser.add_argument(
"--learning_rate", type=float, default=1e-5, help="Adam learning rate."
"--learning-rate", type=float, default=1e-5, help="Adam learning rate."
)
parser.add_argument(
"--steps_per_report",
"--steps-per-report",
type=int,
default=10,
help="Number of training steps between loss reporting.",
)
parser.add_argument(
"--steps_per_eval",
"--steps-per-eval",
type=int,
default=200,
help="Number of training steps between validations.",
)
parser.add_argument(
"--resume_adapter_file",
"--resume-adapter-file",
type=str,
default=None,
help="Load path to resume training with the given adapter weights.",
)
parser.add_argument(
"--adapter_file",
"--adapter-file",
type=str,
default="adapters.npz",
help="Save/load path for the trained adapter weights.",
@ -106,7 +106,7 @@ def build_parser():
help="Evaluate on the test set after training",
)
parser.add_argument(
"--test_batches",
"--test-batches",
type=int,
default=500,
help="Number of test set batches, -1 uses the entire test set.",