From 5de7c2ac330633f08341bc2604266908cb49872f Mon Sep 17 00:00:00 2001
From: Muhtasham Oblokulov <muhtasham97@gmail.com>
Date: Wed, 6 Mar 2024 02:43:15 +0100
Subject: [PATCH] Add tips on porting LLMs from HuggingFace (#523)

* Add tips on porting LLMs from HuggingFace

* Add CONTRIBUTING.md  to mlx-examples-llms

* Refactor imports and update comment in starcoder2.py

* Update llms/mlx_lm/models/starcoder2.py

Co-authored-by: Awni Hannun <awni.hannun@gmail.com>

* nits

* nits

---------

Co-authored-by: Awni Hannun <awni.hannun@gmail.com>
Co-authored-by: Awni Hannun <awni@apple.com>
---
 CONTRIBUTING.md                  |  4 ++--
 llms/CONTRIBUTING.md             | 38 ++++++++++++++++++++++++++++++++
 llms/mlx_lm/models/starcoder2.py |  5 ++---
 3 files changed, 42 insertions(+), 5 deletions(-)
 create mode 100644 llms/CONTRIBUTING.md

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 58a10626..233e4ea9 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -14,11 +14,11 @@ possible.
  
    You can also run the formatters manually as follows:
  
-     ```
+     ```bash
      clang-format -i file.cpp
      ```
  
-     ```
+     ```bash
      black file.py
      ```
  
diff --git a/llms/CONTRIBUTING.md b/llms/CONTRIBUTING.md
new file mode 100644
index 00000000..e3590f4e
--- /dev/null
+++ b/llms/CONTRIBUTING.md
@@ -0,0 +1,38 @@
+# Contributing to MLX LM 
+
+Below are some tips to port LLMs available on Hugging Face to MLX.
+
+Before starting checkout the [general contribution
+guidelines](https://github.com/ml-explore/mlx-examples/blob/main/CONTRIBUTING.md).
+
+Next, from this directory, do an editable install:
+
+```shell
+pip install -e .
+```
+
+Then check if the model has weights in the
+[safetensors](https://huggingface.co/docs/safetensors/index) format. If not
+[follow instructions](https://huggingface.co/spaces/safetensors/convert) to
+convert it.
+
+After that, add the model file to the
+[`mlx_lm/models`](https://github.com/ml-explore/mlx-examples/tree/main/llms/mlx_lm/models)
+directory. You can see other examples there. We recommend starting from a model
+that is similar to the model you are porting.
+
+Make sure the name of the new model file is the same as the `model_type` in the
+`config.json`, for example
+[starcoder2](https://huggingface.co/bigcode/starcoder2-7b/blob/main/config.json#L17).
+
+To determine the model layer names, we suggest either:
+
+- Refer to the Transformers implementation if you are familiar with the
+  codebase.
+- Load the model weights and check the weight names which will tell you about
+  the model structure.
+- Look at the names of the weights by inspecting `model.safetensors.index.json`
+  in the Hugging Face repo.
+
+To add LoRA support edit
+[`mlx_lm/tuner/utils.py`](https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/tuner/utils.py#L27-L60)
diff --git a/llms/mlx_lm/models/starcoder2.py b/llms/mlx_lm/models/starcoder2.py
index c0e32412..7a431800 100644
--- a/llms/mlx_lm/models/starcoder2.py
+++ b/llms/mlx_lm/models/starcoder2.py
@@ -1,6 +1,5 @@
-import math
 from dataclasses import dataclass
-from typing import Dict, Optional, Tuple, Union
+from typing import Optional, Tuple
 
 import mlx.core as mx
 import mlx.nn as nn
@@ -158,7 +157,7 @@ class Model(nn.Module):
         super().__init__()
         self.model_type = args.model_type
         self.model = Starcoder2Model(args)
-        # This is for 15B starcoder2 since it doesn't tie word embeddings
+        # For 15B starcoder2 and fine-tuned models which don't tie word embeddings
         if not args.tie_word_embeddings:
             self.lm_head = nn.Linear(args.hidden_size, args.vocab_size, bias=False)