From e89a1316688d131f31f11ae6caaecb68fe0e3b74 Mon Sep 17 00:00:00 2001
From: Sindhu Satish <sindhusatish155@gmail.com>
Date: Wed, 29 Jan 2025 05:53:18 -0800
Subject: [PATCH 1/5] Include revision version for HF models while loading

---
 llms/mlx_lm/lora.py  | 9 ++++++++-
 llms/mlx_lm/utils.py | 5 +++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/llms/mlx_lm/lora.py b/llms/mlx_lm/lora.py
index 43f508c3..7ebfb100 100644
--- a/llms/mlx_lm/lora.py
+++ b/llms/mlx_lm/lora.py
@@ -73,6 +73,13 @@ def build_parser():
         help="The path to the local model directory or Hugging Face repo.",
     )
 
+    parser.add_argument(
+        "--revision",
+        default="main",
+        type=str,
+        help="Hash value of the commit to checkout from the Hugging Face repo.",
+    )
+
     # Training args
     parser.add_argument(
         "--train",
@@ -252,7 +259,7 @@ def run(args, training_callback: TrainingCallback = None):
     np.random.seed(args.seed)
 
     print("Loading pretrained model")
-    model, tokenizer = load(args.model)
+    model, tokenizer = load(args.model, args.revision)
 
     print("Loading datasets")
     train_set, valid_set, test_set = load_dataset(args, tokenizer)
diff --git a/llms/mlx_lm/utils.py b/llms/mlx_lm/utils.py
index 0150f1b7..03d22626 100644
--- a/llms/mlx_lm/utils.py
+++ b/llms/mlx_lm/utils.py
@@ -152,7 +152,7 @@ def compute_bits_per_weight(model):
     return model_bytes * 8 / model_params
 
 
-def get_model_path(path_or_hf_repo: str, revision: Optional[str] = None) -> Path:
+def get_model_path(path_or_hf_repo: str, revision: Optional[str] = "main") -> Path:
     """
     Ensures the model is available locally. If the path does not exist locally,
     it is downloaded from the Hugging Face Hub.
@@ -184,7 +184,7 @@ def get_model_path(path_or_hf_repo: str, revision: Optional[str] = None) -> Path
             )
         except:
             raise ModelNotFoundError(
-                f"Model not found for path or HF repo: {path_or_hf_repo}.\n"
+                f"Model not found for path or HF repo: {path_or_hf_repo}:{revision}.\n"
                 "Please make sure you specified the local path or Hugging Face"
                 " repo id correctly.\nIf you are trying to access a private or"
                 " gated Hugging Face repo, make sure you are authenticated:\n"
@@ -709,6 +709,7 @@ def load(
     model_config={},
     adapter_path: Optional[str] = None,
     lazy: bool = False,
+    commit_hash: Optional[str] = "main",
 ) -> Tuple[nn.Module, TokenizerWrapper]:
     """
     Load the model and tokenizer from a given path or a huggingface repository.

From dd1690df81931c8472fb7fa028ff897a9efb1329 Mon Sep 17 00:00:00 2001
From: Sindhu Satish <sindhusatish155@gmail.com>
Date: Wed, 29 Jan 2025 06:00:06 -0800
Subject: [PATCH 2/5] bug fix

---
 llms/mlx_lm/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llms/mlx_lm/utils.py b/llms/mlx_lm/utils.py
index 03d22626..8e48ab25 100644
--- a/llms/mlx_lm/utils.py
+++ b/llms/mlx_lm/utils.py
@@ -709,7 +709,7 @@ def load(
     model_config={},
     adapter_path: Optional[str] = None,
     lazy: bool = False,
-    commit_hash: Optional[str] = "main",
+    revision: Optional[str] = "main",
 ) -> Tuple[nn.Module, TokenizerWrapper]:
     """
     Load the model and tokenizer from a given path or a huggingface repository.

From b0520e770880e639d9a25e852e6c20027b51680d Mon Sep 17 00:00:00 2001
From: Sindhu Satish <sindhusatish155@gmail.com>
Date: Wed, 29 Jan 2025 06:21:36 -0800
Subject: [PATCH 3/5] Bug fix - Qwen2 support

---
 llms/mlx_lm/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llms/mlx_lm/utils.py b/llms/mlx_lm/utils.py
index 8e48ab25..96be6d29 100644
--- a/llms/mlx_lm/utils.py
+++ b/llms/mlx_lm/utils.py
@@ -43,6 +43,7 @@ MODEL_REMAPPING = {
     "mistral": "llama",  # mistral is compatible with llama
     "phi-msft": "phixtral",
     "falcon_mamba": "mamba",
+    "qwen2": "qwen2",
 }
 
 MAX_FILE_SIZE_GB = 5

From ba6c7d3aba506c5f03152fe42992489171e4f444 Mon Sep 17 00:00:00 2001
From: Sindhu Satish <sindhusatish155@gmail.com>
Date: Wed, 29 Jan 2025 07:30:11 -0800
Subject: [PATCH 4/5] Qwen2 support

---
 llms/mlx_lm/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llms/mlx_lm/utils.py b/llms/mlx_lm/utils.py
index 96be6d29..68d2204e 100644
--- a/llms/mlx_lm/utils.py
+++ b/llms/mlx_lm/utils.py
@@ -43,7 +43,7 @@ MODEL_REMAPPING = {
     "mistral": "llama",  # mistral is compatible with llama
     "phi-msft": "phixtral",
     "falcon_mamba": "mamba",
-    "qwen2": "qwen2",
+    "qwen2": "qwen2"
 }
 
 MAX_FILE_SIZE_GB = 5

From ec06c04f4fb81553ac47f9a6fc871dc6ee911869 Mon Sep 17 00:00:00 2001
From: Sindhu Satish <sindhusatish155@gmail.com>
Date: Wed, 29 Jan 2025 07:37:58 -0800
Subject: [PATCH 5/5] revert revision changes and retain qwen2 support

---
 llms/mlx_lm/lora.py  | 11 ++---------
 llms/mlx_lm/utils.py |  7 +++----
 2 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/llms/mlx_lm/lora.py b/llms/mlx_lm/lora.py
index 7ebfb100..fc004ae2 100644
--- a/llms/mlx_lm/lora.py
+++ b/llms/mlx_lm/lora.py
@@ -73,13 +73,6 @@ def build_parser():
         help="The path to the local model directory or Hugging Face repo.",
     )
 
-    parser.add_argument(
-        "--revision",
-        default="main",
-        type=str,
-        help="Hash value of the commit to checkout from the Hugging Face repo.",
-    )
-
     # Training args
     parser.add_argument(
         "--train",
@@ -259,7 +252,7 @@ def run(args, training_callback: TrainingCallback = None):
     np.random.seed(args.seed)
 
     print("Loading pretrained model")
-    model, tokenizer = load(args.model, args.revision)
+    model, tokenizer = load(args.model)
 
     print("Loading datasets")
     train_set, valid_set, test_set = load_dataset(args, tokenizer)
@@ -303,4 +296,4 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
+    main()
\ No newline at end of file
diff --git a/llms/mlx_lm/utils.py b/llms/mlx_lm/utils.py
index 68d2204e..6007adb0 100644
--- a/llms/mlx_lm/utils.py
+++ b/llms/mlx_lm/utils.py
@@ -153,7 +153,7 @@ def compute_bits_per_weight(model):
     return model_bytes * 8 / model_params
 
 
-def get_model_path(path_or_hf_repo: str, revision: Optional[str] = "main") -> Path:
+def get_model_path(path_or_hf_repo: str, revision: Optional[str] = None) -> Path:
     """
     Ensures the model is available locally. If the path does not exist locally,
     it is downloaded from the Hugging Face Hub.
@@ -185,7 +185,7 @@ def get_model_path(path_or_hf_repo: str, revision: Optional[str] = "main") -> Pa
             )
         except:
             raise ModelNotFoundError(
-                f"Model not found for path or HF repo: {path_or_hf_repo}:{revision}.\n"
+                f"Model not found for path or HF repo: {path_or_hf_repo}.\n"
                 "Please make sure you specified the local path or Hugging Face"
                 " repo id correctly.\nIf you are trying to access a private or"
                 " gated Hugging Face repo, make sure you are authenticated:\n"
@@ -710,7 +710,6 @@ def load(
     model_config={},
     adapter_path: Optional[str] = None,
     lazy: bool = False,
-    revision: Optional[str] = "main",
 ) -> Tuple[nn.Module, TokenizerWrapper]:
     """
     Load the model and tokenizer from a given path or a huggingface repository.
@@ -1028,4 +1027,4 @@ def convert(
     save_config(config, config_path=mlx_path / "config.json")
 
     if upload_repo is not None:
-        upload_to_hub(mlx_path, upload_repo, hf_path)
+        upload_to_hub(mlx_path, upload_repo, hf_path)
\ No newline at end of file