From 10853b57d9a5a8b696ac3d0ad9bc71e1b0d15c6f Mon Sep 17 00:00:00 2001
From: JosefAlbers <146810011+JosefAlbers@users.noreply.github.com>
Date: Sat, 11 May 2024 02:13:34 +0900
Subject: [PATCH] Add `model_config` parameter to `load()` and `load_model()`
 (#770)

* Add `model_config` parameter to `load()` and `load_model()`

For easy editing of the loaded model configuration (e.g., for changing RoPE theta or scaling of Phi-3 model)

Example:

```python
from mlx_lm import load, generate
model, tokenizer = load("mlx-community/Phi-3-mini-4k-instruct-4bit-no-q-embed", model_config={"rope_theta":50000.0})
response = generate(model, tokenizer, prompt, max_tokens=MAX_TOKENS)
```

* Possible bug (default_loss)

* Revert "Possible bug (default_loss)"

This reverts commit 70a55ace1847f545300d9e62df835f65386f4fc0.

* Fix default_loss for lora

* 1. move load_model's new optional `model_config` arg to the end (fetch_from_hub()'s `model = load_model(model_path, lazy)`) 2. fix indentations (`black` hook)
---
 llms/mlx_lm/utils.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/llms/mlx_lm/utils.py b/llms/mlx_lm/utils.py
index b3667609..305cf518 100644
--- a/llms/mlx_lm/utils.py
+++ b/llms/mlx_lm/utils.py
@@ -299,7 +299,11 @@ def load_config(model_path: Path) -> dict:
     return config
 
 
-def load_model(model_path: Path, lazy: bool = False) -> nn.Module:
+def load_model(
+    model_path: Path,
+    lazy: bool = False,
+    model_config: dict = {},
+) -> nn.Module:
     """
     Load and initialize the model from a given path.
 
@@ -308,6 +312,8 @@ def load_model(model_path: Path, lazy: bool = False) -> nn.Module:
         lazy (bool): If False eval the model parameters to make sure they are
             loaded in memory before returning, otherwise they will be loaded
             when needed. Default: ``False``
+        model_config(dict, optional): Configuration parameters for the model.
+            Defaults to an empty dictionary.
 
     Returns:
         nn.Module: The loaded and initialized model.
@@ -318,6 +324,7 @@ def load_model(model_path: Path, lazy: bool = False) -> nn.Module:
     """
 
     config = load_config(model_path)
+    config.update(model_config)
 
     weight_files = glob.glob(str(model_path / "model*.safetensors"))
 
@@ -365,6 +372,7 @@ def load_model(model_path: Path, lazy: bool = False) -> nn.Module:
 def load(
     path_or_hf_repo: str,
     tokenizer_config={},
+    model_config={},
     adapter_path: Optional[str] = None,
     lazy: bool = False,
 ) -> Tuple[nn.Module, TokenizerWrapper]:
@@ -375,6 +383,8 @@ def load(
         path_or_hf_repo (Path): The path or the huggingface repository to load the model from.
         tokenizer_config (dict, optional): Configuration parameters specifically for the tokenizer.
             Defaults to an empty dictionary.
+        model_config(dict, optional): Configuration parameters specifically for the model.
+            Defaults to an empty dictionary.
         adapter_path (str, optional): Path to the LoRA adapters. If provided, applies LoRA layers
             to the model. Default: ``None``.
         lazy (bool): If False eval the model parameters to make sure they are
@@ -389,7 +399,7 @@ def load(
     """
     model_path = get_model_path(path_or_hf_repo)
 
-    model = load_model(model_path, lazy)
+    model = load_model(model_path, lazy, model_config)
     if adapter_path is not None:
         model = apply_lora_layers(model, adapter_path)
         model.eval()