support dora finetune in mlx-examples/llms/mlx_lm (#779)

* support dora finetune

* solve problems in lora.py and tuner.utils.py

* add use_dora (bool) in functions of load adapters

* delete all unsupported quantization code and fix all the calculate problems in mlx_lm/tuner/dora.py

* Using stop_gradient to prevent gradients from flowing through ‘norm’ during backpropagation

* set DEFAULT_USE_DORA in mlx_lm/generate.py

* add annotation for all the use_dora

* mlx_lm/fuse.py support fuse dora layers and fix a bug of to_linear() in mlx_lm/tuner/dora.py

* simplify code of juding type of a fused layer in mlx_lm/fuse.py

* add use_dora in mlx_lm/fuse.py when apply_lora_layers()

* style + nits

* style + nits

* more updates

---------

Co-authored-by: chenyifei08 <chenyifei08@baidu.com>
Co-authored-by: Awni Hannun <awni@apple.com>
This commit is contained in:
alexC-nonsense4k
2024-05-16 23:21:26 +08:00
committed by GitHub
parent 69181e0058
commit 42458914c8
7 changed files with 147 additions and 19 deletions

View File

@@ -9,6 +9,7 @@ import mlx.nn as nn
import mlx.optimizers as opt
from mlx.utils import tree_unflatten
from .dora import DoRALinear
from .lora import LoRALinear
@@ -36,6 +37,7 @@ def linear_to_lora_layers(
model: nn.Module,
num_lora_layers: int,
config: Dict,
use_dora: bool = False,
):
"""
Convert some of the models linear layers to lora layers.
@@ -46,6 +48,8 @@ def linear_to_lora_layers(
starting from the last layer.
config (dict): More configuration parameters for LoRA, including the
rank, alpha, scale, and optional layer keys.
use_dora (bool): If True, uses DoRA instead of LoRA.
Default: ``False``
"""
num_layers = len(model.layers)
@@ -54,14 +58,16 @@ def linear_to_lora_layers(
f"Requested {num_lora_layers} LoRA layers "
f"but the model only has {num_layers} layers."
)
cls = DoRALinear if use_dora else LoRALinear
to_lora = lambda lin: LoRALinear.from_linear(
lin,
r=config["rank"],
alpha=config["alpha"],
scale=config["scale"],
dropout=config["dropout"],
)
def to_lora(lin):
return cls.from_linear(
lin,
r=config["rank"],
alpha=config["alpha"],
scale=config["scale"],
dropout=config["dropout"],
)
keys = config.get("keys", None)
if keys is not None:
@@ -119,7 +125,12 @@ def apply_lora_layers(model: nn.Module, adapter_path: str) -> nn.Module:
raise FileNotFoundError(f"The adapter path does not exist: {adapter_path}")
with open(adapter_path / "adapter_config.json", "r") as fid:
config = types.SimpleNamespace(**json.load(fid))
linear_to_lora_layers(model, config.lora_layers, config.lora_parameters)
linear_to_lora_layers(
model,
config.lora_layers,
config.lora_parameters,
getattr(config, "use_dora", False),
)
model.load_weights(str(adapter_path / "adapters.safetensors"), strict=False)
return model