refactor: merge deepseek coder example into hf_llm example (#234)

* refactor: merge deepseek coder example into hf_llm example * remove deepseek example * chore: fix format in readme * chore: remove default rope_scaling dict and use get to access type and factor to avoid key error * Update llms/hf_llm/models.py Co-authored-by: Awni Hannun <awni.hannun@gmail.com> * chore: fix lint --------- Co-authored-by: Awni Hannun <awni.hannun@gmail.com>
2025-12-16 02:08:55 +08:00 · 2024-01-06 07:53:46 -08:00
parent cf0ad26a89
commit 758f05c09a
7 changed files with 22 additions and 527 deletions
--- a/llms/hf_llm/README.md
+++ b/llms/hf_llm/README.md
@@ -45,6 +45,8 @@ Here are a few examples of Hugging Face models which work with this example:
 - [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1)
 - [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf)
 - [TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T](https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T)
+- [deepseek-ai/deepseek-coder-6.7b-instruct](https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct)
+- [01-ai/Yi-6B-Chat](https://huggingface.co/01-ai/Yi-6B-Chat)

 Most
 [Mistral](https://huggingface.co/models?library=transformers,safetensors&other=mistral&sort=trending)
--- a/llms/hf_llm/models.py
+++ b/llms/hf_llm/models.py
@@ -5,7 +5,7 @@ import inspect
 import json
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Optional, Tuple
+from typing import Dict, Optional, Tuple, Union

 import mlx.core as mx
 import mlx.nn as nn
@@ -26,11 +26,20 @@ class ModelArgs:
    rope_theta: float = 10000
    rope_traditional: bool = False
    model_type: str = None
+    rope_scaling: Optional[Dict[str, Union[float, str]]] = None

    def __post_init__(self):
        if self.num_key_value_heads is None:
            self.num_key_value_heads = self.num_attention_heads

+        if self.rope_scaling:
+            required_keys = {"factor", "type"}
+            if not all(key in self.rope_scaling for key in required_keys):
+                raise ValueError(f"rope_scaling must contain keys {required_keys}")
+
+            if self.rope_scaling["type"] != "linear":
+                raise ValueError("rope_scaling 'type' currently only supports 'linear'")
+
    @classmethod
    def from_dict(cls, params):
        return cls(
@@ -73,8 +82,16 @@ class Attention(nn.Module):
        self.k_proj = nn.Linear(dim, n_kv_heads * head_dim, bias=False)
        self.v_proj = nn.Linear(dim, n_kv_heads * head_dim, bias=False)
        self.o_proj = nn.Linear(n_heads * head_dim, dim, bias=False)
+        rope_scale = (
+            1 / args.rope_scaling["factor"]
+            if args.rope_scaling is not None and args.rope_scaling["type"] == "linear"
+            else 1
+        )
        self.rope = nn.RoPE(
-            head_dim, traditional=args.rope_traditional, base=args.rope_theta
+            head_dim,
+            traditional=args.rope_traditional,
+            base=args.rope_theta,
+            scale=rope_scale,
        )

    def __call__(
--- a/llms/hf_llm/requirements.txt
+++ b/llms/hf_llm/requirements.txt
@@ -1,3 +1,4 @@
 mlx>=0.0.7
 numpy
 transformers
+protobuf