mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-06-24 17:31:18 +08:00
more nits
This commit is contained in:
parent
fa51553f09
commit
d108c558fc
@ -42,19 +42,21 @@ from Meta.
|
||||
Convert the model with:
|
||||
|
||||
```
|
||||
python convert.py <path_to_torch_model> <path_to_mlx_model>
|
||||
python convert.py \
|
||||
--torch-model <path_to_torch_model> \
|
||||
--mlx-model <path_to_mlx_model>
|
||||
```
|
||||
|
||||
## Run
|
||||
|
||||
#### Fine-tune
|
||||
|
||||
The main script is `lora.py`. To see a full list of options run
|
||||
|
||||
```
|
||||
python lora.py --help
|
||||
```
|
||||
|
||||
### Fine-tune
|
||||
|
||||
To fine-tune a model use:
|
||||
|
||||
```
|
||||
@ -67,28 +69,28 @@ Note, the model path should have the MLX weights, the tokenizer, and the
|
||||
`params.json` configuration which will all be output by the `convert.py` script.
|
||||
|
||||
By default, the adapter weights are saved in `adapters.npz`. You can specify
|
||||
the output location with `--adapter_file`.
|
||||
the output location with `--adapter-file`.
|
||||
|
||||
You can resume fine-tuning with an existing adapter with
|
||||
`--resume_adapter_file` to specify the location of the adapter weights.
|
||||
You can resume fine-tuning with an existing adapter with `--resume-adapter-file
|
||||
<path_to_adapters.npz>`.
|
||||
|
||||
#### Evaluate
|
||||
### Evaluate
|
||||
|
||||
To compute test set perplexity use
|
||||
|
||||
```
|
||||
python lora.py --model <path_to_model> \
|
||||
--adapter_file <path_to_adapters.npz> \
|
||||
--adapter-file <path_to_adapters.npz> \
|
||||
--test
|
||||
```
|
||||
|
||||
#### Generate
|
||||
### Generate
|
||||
|
||||
For generation use
|
||||
|
||||
```
|
||||
python lora.py --model <path_to_model> \
|
||||
--adapter_file <path_to_adapters.npz> \
|
||||
--adapter-file <path_to_adapters.npz> \
|
||||
--num-tokens 50 \
|
||||
--prompt "table: 1-10015132-16
|
||||
columns: Player, No., Nationality, Position, Years in Toronto, School/Club Team
|
||||
@ -119,10 +121,10 @@ You can make your own dataset for fine-tuning with LoRA. You can specify the
|
||||
dataset with `--data=<my_data_directory>`. Check the subdirectory `data/` to
|
||||
see the expected format.
|
||||
|
||||
For fine-tuning, the data loader expects a `train.jsonl` and a `valid.jsonl` to
|
||||
be in the data directory. For evaluation (`--test`), the data loader expects a
|
||||
`test.jsonl` in the directory. Each line in the `*.jsonl` file should look
|
||||
like: are:
|
||||
For fine-tuning (`--train`), the data loader expects a `train.jsonl` and a
|
||||
`valid.jsonl` to be in the data directory. For evaluation (`--test`), the data
|
||||
loader expects a `test.jsonl` in the data directory. Each line in the `*.jsonl`
|
||||
file should look like:
|
||||
|
||||
```
|
||||
{"text": "This is an example for the model."}
|
||||
|
@ -14,13 +14,13 @@ if __name__ == "__main__":
|
||||
description="Convert Mistral or Llama models to MLX.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--torch_model",
|
||||
"--torch-model",
|
||||
type=str,
|
||||
default="mistral-7B-v0.1/",
|
||||
help="The torch model directory",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--mlx_model",
|
||||
"--mlx-model",
|
||||
type=str,
|
||||
default="mlx-mistral-7B-v0.1/",
|
||||
help="The directory to store the mlx model",
|
||||
|
18
lora/lora.py
18
lora/lora.py
@ -58,44 +58,44 @@ def build_parser():
|
||||
help="Directory with {train, valid, test}.jsonl files",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--lora_layers",
|
||||
"--lora-layers",
|
||||
type=int,
|
||||
default=16,
|
||||
help="Number of layers to fine-tune",
|
||||
)
|
||||
parser.add_argument("--batch_size", type=int, default=4, help="Minibatch size.")
|
||||
parser.add_argument("--batch-size", type=int, default=4, help="Minibatch size.")
|
||||
parser.add_argument(
|
||||
"--iters", type=int, default=1000, help="Iterations to train for."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--val_batches",
|
||||
"--val-batches",
|
||||
type=int,
|
||||
default=25,
|
||||
help="Number of validation batches, -1 uses the entire validation set.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--learning_rate", type=float, default=1e-5, help="Adam learning rate."
|
||||
"--learning-rate", type=float, default=1e-5, help="Adam learning rate."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--steps_per_report",
|
||||
"--steps-per-report",
|
||||
type=int,
|
||||
default=10,
|
||||
help="Number of training steps between loss reporting.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--steps_per_eval",
|
||||
"--steps-per-eval",
|
||||
type=int,
|
||||
default=200,
|
||||
help="Number of training steps between validations.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--resume_adapter_file",
|
||||
"--resume-adapter-file",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Load path to resume training with the given adapter weights.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--adapter_file",
|
||||
"--adapter-file",
|
||||
type=str,
|
||||
default="adapters.npz",
|
||||
help="Save/load path for the trained adapter weights.",
|
||||
@ -106,7 +106,7 @@ def build_parser():
|
||||
help="Evaluate on the test set after training",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--test_batches",
|
||||
"--test-batches",
|
||||
type=int,
|
||||
default=500,
|
||||
help="Number of test set batches, -1 uses the entire test set.",
|
||||
|
Loading…
Reference in New Issue
Block a user