Move lora example to use the same model format / conversion as hf_llm (#252)

* huffing face the lora example to allow more models

* fixes

* comments

* more readme nits

* fusion + works better for qlora

* nits'

* comments
This commit is contained in:
Awni Hannun
2024-01-09 11:14:52 -08:00
committed by GitHub
parent bbd7172eef
commit 7b258f33ac
10 changed files with 521 additions and 224 deletions

View File

@@ -60,7 +60,7 @@ You can convert (change the data type or quantize) models using the
`convert.py` script. This script takes a Hugging Face repo as input and outputs
a model directory (which you can optionally also upload to Hugging Face).
For example, to make 4-bit quantized a model, run:
For example, to make a 4-bit quantized model, run:
```
python convert.py --hf-path <hf_repo> -q
@@ -73,5 +73,5 @@ python convert.py --help
```
You can upload new models to the [Hugging Face MLX
Community](https://huggingface.co/mlx-community) by specifying `--upload-name``
Community](https://huggingface.co/mlx-community) by specifying `--upload-name`
to `convert.py`.

View File

@@ -39,7 +39,6 @@ def generate(
tic = time.time()
tokens.append(token.item())
# if (n + 1) % 10 == 0:
s = tokenizer.decode(tokens)
print(s[skip:], end="", flush=True)
skip = len(s)

View File

@@ -10,7 +10,6 @@ from typing import Dict, Optional, Tuple, Union
import mlx.core as mx
import mlx.nn as nn
from huggingface_hub import snapshot_download
from mlx.utils import tree_unflatten
from transformers import AutoTokenizer
@@ -250,9 +249,7 @@ def load(path_or_hf_repo: str):
model.load_weights(list(weights.items()))
mx.eval(model.parameters())
tokenizer = AutoTokenizer.from_pretrained(
model_path,
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
return model, tokenizer