From 40f9e83306a45dd2e35aa1ecffab082d9931d2dd Mon Sep 17 00:00:00 2001 From: Goekdeniz-Guelmez Date: Wed, 18 Sep 2024 13:24:39 +0200 Subject: [PATCH] Forther update to trainer/utils for correct layer selection. Successfull training --- llms/mlx_lm/tuner/utils.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/llms/mlx_lm/tuner/utils.py b/llms/mlx_lm/tuner/utils.py index ab0e470e..0a8a0913 100644 --- a/llms/mlx_lm/tuner/utils.py +++ b/llms/mlx_lm/tuner/utils.py @@ -53,9 +53,13 @@ def linear_to_lora_layers( Default: ``False`` """ if hasattr(model, "backbone"): - num_layers = len(model.backbone.layers) + layers = model.backbone.layers + elif hasattr(model, "layers"): + layers = model.layers else: - num_layers = len(model.layers) + raise ValueError("Unsupported model structure") + + num_layers = len(layers) if num_lora_layers < 0: num_lora_layers = num_layers @@ -143,9 +147,18 @@ def linear_to_lora_layers( "self_attn.kv_b_proj", ] ) + if model.model_type == "mamba": + keys = set([ + "mixer.in_proj", + "mixer.x_proj", + "mixer.dt_proj", + "mixer.out_proj", + ]) else: raise ValueError(f"Lora does not support {model.model_type}") + # Modified the layer selection to handle both regular and backbone structures: + layers = model.backbone.layers if hasattr(model, "backbone") else model.layers for l in model.layers[num_layers - num_lora_layers :]: lora_layers = [(k, to_lora(m)) for k, m in l.named_modules() if k in keys] if lora_layers: