Lazy import + refactor Lora layer addition (#426)

* lazy model import in mlx_lm * change lora loading * fix olmo lora * remove a bunch of unused stuff from plamo * move phixtral to mlx-lm and out of llms/
2025-12-16 02:08:55 +08:00 · 2024-02-12 10:51:02 -08:00
parent 4576946151
commit d4666615bb
15 changed files with 127 additions and 393 deletions
--- a/llms/mlx_lm/models/olmo.py
+++ b/llms/mlx_lm/models/olmo.py
@@ -7,18 +7,25 @@ import mlx.nn as nn

 from .base import BaseModelArgs

+try:
+    import hf_olmo
+except ImportError:
+    print("To run olmo install ai2-olmo: pip install ai2-olmo")
+    exit(1)
+

@dataclass
 class ModelArgs(BaseModelArgs):
+    model_type: str
    d_model: int
    n_layers: int
    mlp_hidden_size: int
    n_heads: int
    vocab_size: int
    embedding_size: int
+    model_type: str
    rope_theta: float = 10000
    rope_traditional: bool = False
-    model_type: str = None
    mlp_ratio: int = 4
    weight_tying: bool = False

@@ -162,11 +169,7 @@ class OlmoModel(nn.Module):
 class Model(nn.Module):
    def __init__(self, args: ModelArgs):
        super().__init__()
-        try:
-            import hf_olmo
-        except ImportError:
-            print("To run olmo install ai2-olmo: pip install ai2-olmo")
-            exit(1)
+        self.model_type = args.model_type
        self.model = OlmoModel(args)

    def __call__(