Allow the entire model to be targed for LoRA and DoRA fine tuning: LoRA and DoRA embeddings with small DoRALinear bug fix (#914)

* feature: LoRA adapter for Embeddings * feature: wire in LoRAEmbedding into the tuner. Allow the embedding and non model.layers Linear layers to be targeted for fine tuning * feature: DoRA adapter for Embeddings * feature: wire in DoRAEmbedding * bugfix: ensure self.m is recalculated when the linear layer is changed in DoRALinear.from_linear * refactor: prefer from_base over from_linear or from_embedding. prefer fuse over to_linear or to_embedding * cleanup: remove unused imports in test_dora.py * refactor: remove unnecessary non_layer_modules * cleanup: remove wrong comments for lora embedding dropout. remove uncessary parens in dora embedding dropout * nits --------- Co-authored-by: Awni Hannun <awni@apple.com>
2025-09-11 22:44:41 +08:00 · 2024-08-16 09:38:36 -05:00
parent c50971e860
commit 4e01700816
5 changed files with 306 additions and 21 deletions
--- a/llms/mlx_lm/fuse.py
+++ b/llms/mlx_lm/fuse.py
@@ -6,8 +6,8 @@ from pathlib import Path
 from mlx.utils import tree_flatten, tree_unflatten

 from .gguf import convert_to_gguf
-from .tuner.dora import DoRALinear
-from .tuner.lora import LoRALinear, LoRASwitchLinear
+from .tuner.dora import DoRAEmbedding, DoRALinear
+from .tuner.lora import LoRAEmbedding, LoRALinear, LoRASwitchLinear
 from .tuner.utils import apply_lora_layers, dequantize
 from .utils import (
    fetch_from_hub,
@@ -80,9 +80,11 @@ def main() -> None:
    model = apply_lora_layers(model, args.adapter_path)

    fused_linears = [
-        (n, m.to_linear())
+        (n, m.fuse())
        for n, m in model.named_modules()
-        if isinstance(m, (LoRASwitchLinear, LoRALinear, DoRALinear))
+        if isinstance(
+            m, (LoRASwitchLinear, LoRALinear, LoRAEmbedding, DoRALinear, DoRAEmbedding)
+        )
    ]

    model.update_modules(tree_unflatten(fused_linears))