Lazy import + refactor Lora layer addition (#426)

* lazy model import in mlx_lm * change lora loading * fix olmo lora * remove a bunch of unused stuff from plamo * move phixtral to mlx-lm and out of llms/
2025-12-16 02:08:55 +08:00 · 2024-02-12 10:51:02 -08:00
parent 4576946151
commit d4666615bb
15 changed files with 127 additions and 393 deletions
--- a/llms/mlx_lm/models/qwen.py
+++ b/llms/mlx_lm/models/qwen.py
@@ -9,6 +9,7 @@ from .base import BaseModelArgs

@dataclass
 class ModelArgs(BaseModelArgs):
+    model_type: str
    hidden_size: int = 2048
    num_attention_heads: int = 16
    num_hidden_layers: int = 24
@@ -160,6 +161,7 @@ class QwenModel(nn.Module):
 class Model(nn.Module):
    def __init__(self, config: ModelArgs):
        super().__init__()
+        self.model_type = config.model_type
        self.transformer = QwenModel(config)
        self.lm_head = nn.Linear(
            config.hidden_size, config.vocab_size, bias=not config.no_bias