From 5de7c2ac330633f08341bc2604266908cb49872f Mon Sep 17 00:00:00 2001 From: Muhtasham Oblokulov Date: Wed, 6 Mar 2024 02:43:15 +0100 Subject: [PATCH] Add tips on porting LLMs from HuggingFace (#523) * Add tips on porting LLMs from HuggingFace * Add CONTRIBUTING.md to mlx-examples-llms * Refactor imports and update comment in starcoder2.py * Update llms/mlx_lm/models/starcoder2.py Co-authored-by: Awni Hannun * nits * nits --------- Co-authored-by: Awni Hannun Co-authored-by: Awni Hannun --- CONTRIBUTING.md | 4 ++-- llms/CONTRIBUTING.md | 38 ++++++++++++++++++++++++++++++++ llms/mlx_lm/models/starcoder2.py | 5 ++--- 3 files changed, 42 insertions(+), 5 deletions(-) create mode 100644 llms/CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 58a10626..233e4ea9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -14,11 +14,11 @@ possible. You can also run the formatters manually as follows: - ``` + ```bash clang-format -i file.cpp ``` - ``` + ```bash black file.py ``` diff --git a/llms/CONTRIBUTING.md b/llms/CONTRIBUTING.md new file mode 100644 index 00000000..e3590f4e --- /dev/null +++ b/llms/CONTRIBUTING.md @@ -0,0 +1,38 @@ +# Contributing to MLX LM + +Below are some tips to port LLMs available on Hugging Face to MLX. + +Before starting checkout the [general contribution +guidelines](https://github.com/ml-explore/mlx-examples/blob/main/CONTRIBUTING.md). + +Next, from this directory, do an editable install: + +```shell +pip install -e . +``` + +Then check if the model has weights in the +[safetensors](https://huggingface.co/docs/safetensors/index) format. If not +[follow instructions](https://huggingface.co/spaces/safetensors/convert) to +convert it. + +After that, add the model file to the +[`mlx_lm/models`](https://github.com/ml-explore/mlx-examples/tree/main/llms/mlx_lm/models) +directory. You can see other examples there. We recommend starting from a model +that is similar to the model you are porting. + +Make sure the name of the new model file is the same as the `model_type` in the +`config.json`, for example +[starcoder2](https://huggingface.co/bigcode/starcoder2-7b/blob/main/config.json#L17). + +To determine the model layer names, we suggest either: + +- Refer to the Transformers implementation if you are familiar with the + codebase. +- Load the model weights and check the weight names which will tell you about + the model structure. +- Look at the names of the weights by inspecting `model.safetensors.index.json` + in the Hugging Face repo. + +To add LoRA support edit +[`mlx_lm/tuner/utils.py`](https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/tuner/utils.py#L27-L60) diff --git a/llms/mlx_lm/models/starcoder2.py b/llms/mlx_lm/models/starcoder2.py index c0e32412..7a431800 100644 --- a/llms/mlx_lm/models/starcoder2.py +++ b/llms/mlx_lm/models/starcoder2.py @@ -1,6 +1,5 @@ -import math from dataclasses import dataclass -from typing import Dict, Optional, Tuple, Union +from typing import Optional, Tuple import mlx.core as mx import mlx.nn as nn @@ -158,7 +157,7 @@ class Model(nn.Module): super().__init__() self.model_type = args.model_type self.model = Starcoder2Model(args) - # This is for 15B starcoder2 since it doesn't tie word embeddings + # For 15B starcoder2 and fine-tuned models which don't tie word embeddings if not args.tie_word_embeddings: self.lm_head = nn.Linear(args.hidden_size, args.vocab_size, bias=False)