mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-06-24 17:31:18 +08:00
Create executables for generate, lora, server, merge, convert (#682)
* feat: create executables mlx_lm.<cmd> * nits in docs --------- Co-authored-by: Awni Hannun <awni@apple.com>
This commit is contained in:
parent
7d7e236061
commit
35206806ac
@ -66,7 +66,7 @@ To see a description of all the arguments you can do:
|
|||||||
You can also use `mlx-lm` from the command line with:
|
You can also use `mlx-lm` from the command line with:
|
||||||
|
|
||||||
```
|
```
|
||||||
python -m mlx_lm.generate --model mistralai/Mistral-7B-Instruct-v0.1 --prompt "hello"
|
mlx_lm.generate --model mistralai/Mistral-7B-Instruct-v0.1 --prompt "hello"
|
||||||
```
|
```
|
||||||
|
|
||||||
This will download a Mistral 7B model from the Hugging Face Hub and generate
|
This will download a Mistral 7B model from the Hugging Face Hub and generate
|
||||||
@ -75,19 +75,19 @@ text using the given prompt.
|
|||||||
For a full list of options run:
|
For a full list of options run:
|
||||||
|
|
||||||
```
|
```
|
||||||
python -m mlx_lm.generate --help
|
mlx_lm.generate --help
|
||||||
```
|
```
|
||||||
|
|
||||||
To quantize a model from the command line run:
|
To quantize a model from the command line run:
|
||||||
|
|
||||||
```
|
```
|
||||||
python -m mlx_lm.convert --hf-path mistralai/Mistral-7B-Instruct-v0.1 -q
|
mlx_lm.convert --hf-path mistralai/Mistral-7B-Instruct-v0.1 -q
|
||||||
```
|
```
|
||||||
|
|
||||||
For more options run:
|
For more options run:
|
||||||
|
|
||||||
```
|
```
|
||||||
python -m mlx_lm.convert --help
|
mlx_lm.convert --help
|
||||||
```
|
```
|
||||||
|
|
||||||
You can upload new models to Hugging Face by specifying `--upload-repo` to
|
You can upload new models to Hugging Face by specifying `--upload-repo` to
|
||||||
@ -95,7 +95,7 @@ You can upload new models to Hugging Face by specifying `--upload-repo` to
|
|||||||
[MLX Hugging Face community](https://huggingface.co/mlx-community) you can do:
|
[MLX Hugging Face community](https://huggingface.co/mlx-community) you can do:
|
||||||
|
|
||||||
```
|
```
|
||||||
python -m mlx_lm.convert \
|
mlx_lm.convert \
|
||||||
--hf-path mistralai/Mistral-7B-v0.1 \
|
--hf-path mistralai/Mistral-7B-v0.1 \
|
||||||
-q \
|
-q \
|
||||||
--upload-repo mlx-community/my-4bit-mistral
|
--upload-repo mlx-community/my-4bit-mistral
|
||||||
|
@ -27,7 +27,7 @@ LoRA (QLoRA).[^qlora] LoRA fine-tuning works with the following model families:
|
|||||||
The main command is `mlx_lm.lora`. To see a full list of command-line options run:
|
The main command is `mlx_lm.lora`. To see a full list of command-line options run:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
python -m mlx_lm.lora --help
|
mlx_lm.lora --help
|
||||||
```
|
```
|
||||||
|
|
||||||
Note, in the following the `--model` argument can be any compatible Hugging
|
Note, in the following the `--model` argument can be any compatible Hugging
|
||||||
@ -37,7 +37,7 @@ You can also specify a YAML config with `-c`/`--config`. For more on the format
|
|||||||
[example YAML](examples/lora_config.yaml). For example:
|
[example YAML](examples/lora_config.yaml). For example:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
python -m mlx_lm.lora --config /path/to/config.yaml
|
mlx_lm.lora --config /path/to/config.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
If command-line flags are also used, they will override the corresponding
|
If command-line flags are also used, they will override the corresponding
|
||||||
@ -48,7 +48,7 @@ values in the config.
|
|||||||
To fine-tune a model use:
|
To fine-tune a model use:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
python -m mlx_lm.lora \
|
mlx_lm.lora \
|
||||||
--model <path_to_model> \
|
--model <path_to_model> \
|
||||||
--train \
|
--train \
|
||||||
--data <path_to_data> \
|
--data <path_to_data> \
|
||||||
@ -76,7 +76,7 @@ You can resume fine-tuning with an existing adapter with
|
|||||||
To compute test set perplexity use:
|
To compute test set perplexity use:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
python -m mlx_lm.lora \
|
mlx_lm.lora \
|
||||||
--model <path_to_model> \
|
--model <path_to_model> \
|
||||||
--adapter-path <path_to_adapters> \
|
--adapter-path <path_to_adapters> \
|
||||||
--data <path_to_data> \
|
--data <path_to_data> \
|
||||||
@ -88,7 +88,7 @@ python -m mlx_lm.lora \
|
|||||||
For generation use `mlx_lm.generate`:
|
For generation use `mlx_lm.generate`:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
python -m mlx_lm.generate \
|
mlx_lm.generate \
|
||||||
--model <path_to_model> \
|
--model <path_to_model> \
|
||||||
--adapter-path <path_to_adapters> \
|
--adapter-path <path_to_adapters> \
|
||||||
--prompt "<your_model_prompt>"
|
--prompt "<your_model_prompt>"
|
||||||
@ -106,13 +106,13 @@ You can generate a model fused with the low-rank adapters using the
|
|||||||
To see supported options run:
|
To see supported options run:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
python -m mlx_lm.fuse --help
|
mlx_lm.fuse --help
|
||||||
```
|
```
|
||||||
|
|
||||||
To generate the fused model run:
|
To generate the fused model run:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
python -m mlx_lm.fuse --model <path_to_model>
|
mlx_lm.fuse --model <path_to_model>
|
||||||
```
|
```
|
||||||
|
|
||||||
This will by default load the adapters from `adapters/`, and save the fused
|
This will by default load the adapters from `adapters/`, and save the fused
|
||||||
@ -125,7 +125,7 @@ useful for the sake of attribution and model versioning.
|
|||||||
For example, to fuse and upload a model derived from Mistral-7B-v0.1, run:
|
For example, to fuse and upload a model derived from Mistral-7B-v0.1, run:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
python -m mlx_lm.fuse \
|
mlx_lm.fuse \
|
||||||
--model mistralai/Mistral-7B-v0.1 \
|
--model mistralai/Mistral-7B-v0.1 \
|
||||||
--upload-repo mlx-community/my-4bit-lora-mistral \
|
--upload-repo mlx-community/my-4bit-lora-mistral \
|
||||||
--hf-path mistralai/Mistral-7B-v0.1
|
--hf-path mistralai/Mistral-7B-v0.1
|
||||||
@ -134,7 +134,7 @@ python -m mlx_lm.fuse \
|
|||||||
To export a fused model to GGUF, run:
|
To export a fused model to GGUF, run:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
python -m mlx_lm.fuse \
|
mlx_lm.fuse \
|
||||||
--model mistralai/Mistral-7B-v0.1 \
|
--model mistralai/Mistral-7B-v0.1 \
|
||||||
--export-gguf
|
--export-gguf
|
||||||
```
|
```
|
||||||
|
@ -6,14 +6,14 @@ Face hub or save them locally for LoRA fine tuning.
|
|||||||
The main command is `mlx_lm.merge`:
|
The main command is `mlx_lm.merge`:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
python -m mlx_lm.merge --config config.yaml
|
mlx_lm.merge --config config.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
The merged model will be saved by default in `mlx_merged_model`. To see a
|
The merged model will be saved by default in `mlx_merged_model`. To see a
|
||||||
full list of options run:
|
full list of options run:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
python -m mlx_lm.merge --help
|
mlx_lm.merge --help
|
||||||
```
|
```
|
||||||
|
|
||||||
Here is an example `config.yaml`:
|
Here is an example `config.yaml`:
|
||||||
|
@ -11,13 +11,13 @@ API](https://platform.openai.com/docs/api-reference).
|
|||||||
Start the server with:
|
Start the server with:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
python -m mlx_lm.server --model <path_to_model_or_hf_repo>
|
mlx_lm.server --model <path_to_model_or_hf_repo>
|
||||||
```
|
```
|
||||||
|
|
||||||
For example:
|
For example:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
python -m mlx_lm.server --model mistralai/Mistral-7B-Instruct-v0.1
|
mlx_lm.server --model mistralai/Mistral-7B-Instruct-v0.1
|
||||||
```
|
```
|
||||||
|
|
||||||
This will start a text generation server on port `8080` of the `localhost`
|
This will start a text generation server on port `8080` of the `localhost`
|
||||||
@ -27,7 +27,7 @@ Hugging Face repo if it is not already in the local cache.
|
|||||||
To see a full list of options run:
|
To see a full list of options run:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
python -m mlx_lm.server --help
|
mlx_lm.server --help
|
||||||
```
|
```
|
||||||
|
|
||||||
You can make a request to the model by running:
|
You can make a request to the model by running:
|
||||||
|
@ -52,7 +52,11 @@ def configure_parser() -> argparse.ArgumentParser:
|
|||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
def main():
|
||||||
parser = configure_parser()
|
parser = configure_parser()
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
convert(**vars(args))
|
convert(**vars(args))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
@ -101,7 +101,10 @@ def colorprint_by_t0(s, t0):
|
|||||||
colorprint(color, s)
|
colorprint(color, s)
|
||||||
|
|
||||||
|
|
||||||
def main(args):
|
def main():
|
||||||
|
parser = setup_arg_parser()
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
mx.random.seed(args.seed)
|
mx.random.seed(args.seed)
|
||||||
|
|
||||||
# Building tokenizer_config
|
# Building tokenizer_config
|
||||||
@ -143,6 +146,4 @@ def main(args):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = setup_arg_parser()
|
main()
|
||||||
args = parser.parse_args()
|
|
||||||
main(args)
|
|
||||||
|
@ -247,7 +247,7 @@ def run(args, training_callback: TrainingCallback = None):
|
|||||||
print(f"Test loss {test_loss:.3f}, Test ppl {test_ppl:.3f}.")
|
print(f"Test loss {test_loss:.3f}, Test ppl {test_ppl:.3f}.")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
def main():
|
||||||
parser = build_parser()
|
parser = build_parser()
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
config = args.config
|
config = args.config
|
||||||
@ -266,3 +266,7 @@ if __name__ == "__main__":
|
|||||||
if not args.get(k, None):
|
if not args.get(k, None):
|
||||||
args[k] = v
|
args[k] = v
|
||||||
run(types.SimpleNamespace(**args))
|
run(types.SimpleNamespace(**args))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
@ -162,7 +162,11 @@ def merge(
|
|||||||
upload_to_hub(mlx_path, upload_repo, base_hf_path)
|
upload_to_hub(mlx_path, upload_repo, base_hf_path)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
def main():
|
||||||
parser = configure_parser()
|
parser = configure_parser()
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
merge(**vars(args))
|
merge(**vars(args))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
@ -409,7 +409,7 @@ def run(host: str, port: int, server_class=HTTPServer, handler_class=APIHandler)
|
|||||||
httpd.serve_forever()
|
httpd.serve_forever()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
def main():
|
||||||
parser = argparse.ArgumentParser(description="MLX Http Server.")
|
parser = argparse.ArgumentParser(description="MLX Http Server.")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--model",
|
"--model",
|
||||||
@ -449,3 +449,7 @@ if __name__ == "__main__":
|
|||||||
)
|
)
|
||||||
|
|
||||||
run(args.host, args.port)
|
run(args.host, args.port)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
@ -24,4 +24,14 @@ setup(
|
|||||||
install_requires=requirements,
|
install_requires=requirements,
|
||||||
packages=["mlx_lm", "mlx_lm.models", "mlx_lm.tuner"],
|
packages=["mlx_lm", "mlx_lm.models", "mlx_lm.tuner"],
|
||||||
python_requires=">=3.8",
|
python_requires=">=3.8",
|
||||||
|
entry_points={
|
||||||
|
"console_scripts": [
|
||||||
|
"mlx_lm.convert = mlx_lm.convert:main",
|
||||||
|
"mlx_lm.fuse = mlx_lm.fuse:main",
|
||||||
|
"mlx_lm.generate = mlx_lm.generate:main",
|
||||||
|
"mlx_lm.lora = mlx_lm.lora:main",
|
||||||
|
"mlx_lm.merge = mlx_lm.merge:main",
|
||||||
|
"mlx_lm.server = mlx_lm.server:main",
|
||||||
|
]
|
||||||
|
},
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user