mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-09-01 12:49:50 +08:00
Move lora example to use the same model format / conversion as hf_llm
(#252)
* huffing face the lora example to allow more models * fixes * comments * more readme nits * fusion + works better for qlora * nits' * comments
This commit is contained in:
@@ -60,7 +60,7 @@ You can convert (change the data type or quantize) models using the
|
||||
`convert.py` script. This script takes a Hugging Face repo as input and outputs
|
||||
a model directory (which you can optionally also upload to Hugging Face).
|
||||
|
||||
For example, to make 4-bit quantized a model, run:
|
||||
For example, to make a 4-bit quantized model, run:
|
||||
|
||||
```
|
||||
python convert.py --hf-path <hf_repo> -q
|
||||
@@ -73,5 +73,5 @@ python convert.py --help
|
||||
```
|
||||
|
||||
You can upload new models to the [Hugging Face MLX
|
||||
Community](https://huggingface.co/mlx-community) by specifying `--upload-name``
|
||||
Community](https://huggingface.co/mlx-community) by specifying `--upload-name`
|
||||
to `convert.py`.
|
||||
|
@@ -39,7 +39,6 @@ def generate(
|
||||
tic = time.time()
|
||||
|
||||
tokens.append(token.item())
|
||||
# if (n + 1) % 10 == 0:
|
||||
s = tokenizer.decode(tokens)
|
||||
print(s[skip:], end="", flush=True)
|
||||
skip = len(s)
|
||||
|
@@ -10,7 +10,6 @@ from typing import Dict, Optional, Tuple, Union
|
||||
import mlx.core as mx
|
||||
import mlx.nn as nn
|
||||
from huggingface_hub import snapshot_download
|
||||
from mlx.utils import tree_unflatten
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
|
||||
@@ -250,9 +249,7 @@ def load(path_or_hf_repo: str):
|
||||
model.load_weights(list(weights.items()))
|
||||
|
||||
mx.eval(model.parameters())
|
||||
tokenizer = AutoTokenizer.from_pretrained(
|
||||
model_path,
|
||||
)
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
||||
return model, tokenizer
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user