LoRA on all linear transformer block layers (#546)

* Add --lora-all-linear option to apply LoRa to all linear transfer block layers * Moved to YAML config and added specification of rank & alpha * nits in conifg, more tests * nit * run tests for prs --------- Co-authored-by: Awni Hannun <awni@apple.com>
2025-12-15 01:42:31 +08:00 · 2024-03-12 10:37:40 -04:00
parent fe5edee360
commit e56d9015ef
8 changed files with 163 additions and 40 deletions
--- a/llms/mlx_lm/tuner/utils.py
+++ b/llms/mlx_lm/tuner/utils.py
@@ -1,4 +1,5 @@
 import os
+from typing import Dict

 import mlx.core as mx
 import mlx.nn as nn
@@ -7,7 +8,11 @@ from mlx.utils import tree_unflatten
 from .lora import LoRALinear


-def linear_to_lora_layers(model: nn.Module, num_lora_layers: int):
+def linear_to_lora_layers(
+    model: nn.Module,
+    num_lora_layers: int,
+    config: Dict,
+):
    """
    Convert some of the models linear layers to lora layers.

@@ -15,16 +20,28 @@ def linear_to_lora_layers(model: nn.Module, num_lora_layers: int):
        model (nn.Module): The neural network model.
        num_lora_layers (int): The number of blocks to convert to lora layers
        starting from the last layer.
+        config (dict): More configuration parameters for LoRA, including the
+          rank, alpha, scale, and optional layer keys.
    """

-    def check_lora_layers(num_model):
-        if num_lora_layers > num_model:
-            raise ValueError(
-                f"Requested {num_lora_layers} LoRA layers "
-                f"but the model only has {num_model} layers."
-            )
+    num_layers = len(model.layers)
+    if num_lora_layers > num_layers:
+        raise ValueError(
+            f"Requested {num_lora_layers} LoRA layers "
+            f"but the model only has {num_layers} layers."
+        )

-    if model.model_type in [
+    to_lora = lambda lin: LoRALinear.from_linear(
+        lin, r=config["rank"], alpha=config["alpha"], scale=config["scale"]
+    )
+
+    # If the lora_parameters are set, we assume the keys
+    # are correct for the given model
+
+    keys = config.get("keys", None)
+    if keys is not None:
+        keys = set(keys)
+    elif model.model_type in [
        "mistral",
        "llama",
        "phi",
@@ -34,32 +51,21 @@ def linear_to_lora_layers(model: nn.Module, num_lora_layers: int):
        "gemma",
        "starcoder2",
    ]:
-        check_lora_layers(len(model.model.layers))
-
-        for l in model.model.layers[len(model.model.layers) - num_lora_layers :]:
-            l.self_attn.q_proj = LoRALinear.from_linear(l.self_attn.q_proj)
-            l.self_attn.v_proj = LoRALinear.from_linear(l.self_attn.v_proj)
-            if hasattr(l, "block_sparse_moe"):
-                l.block_sparse_moe.gate = LoRALinear.from_linear(
-                    l.block_sparse_moe.gate
-                )
+        keys = set(["self_attn.q_proj", "self_attn.v_proj"])
+        if model.model_type == "mixtral":
+            keys.add(["block_sparse_moe.gate"])
    elif model.model_type == "olmo":
-        check_lora_layers(len(model.model.transformer.blocks))
-
-        for l in model.model.transformer.blocks[
-            len(model.model.transformer.blocks) - num_lora_layers :
-        ]:
-            l.att_proj = LoRALinear.from_linear(l.att_proj)
+        keys = set(["att_proj"])
    elif model.model_type == "phi-msft":
-        check_lora_layers(len(model.transformer.h))
-
-        for l in model.transformer.h[len(model.transformer.h) - num_lora_layers :]:
-            l.mixer.Wqkv = LoRALinear.from_linear(l.mixer.Wqkv)
-            l.moe.gate = LoRALinear.from_linear(l.moe.gate)
-
+        keys = set(["mixer.Wqkv", "moe.gate"])
    else:
        raise ValueError(f"Lora does not support {model.model_type}")

+    for l in model.layers[num_layers - num_lora_layers :]:
+        modules = l.named_modules()
+        lora_layers = [(k, to_lora(m)) for k, m in l.named_modules() if k in keys]
+        l.update_modules(tree_unflatten(lora_layers))
+

 def apply_lora_layers(model: nn.Module, adapter_file: str) -> nn.Module:
    """