refactor: merge deepseek coder example into hf_llm example (#234)

* refactor: merge deepseek coder example into hf_llm example

* remove deepseek example

* chore: fix format in readme

* chore: remove default rope_scaling dict and use get to access type and factor to avoid key error

* Update llms/hf_llm/models.py

Co-authored-by: Awni Hannun <awni.hannun@gmail.com>

* chore: fix lint

---------

Co-authored-by: Awni Hannun <awni.hannun@gmail.com>
This commit is contained in:
Anchen
2024-01-06 07:53:46 -08:00
committed by GitHub
parent cf0ad26a89
commit 758f05c09a
7 changed files with 22 additions and 527 deletions

View File

@@ -45,6 +45,8 @@ Here are a few examples of Hugging Face models which work with this example:
- [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1)
- [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf)
- [TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T](https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T)
- [deepseek-ai/deepseek-coder-6.7b-instruct](https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct)
- [01-ai/Yi-6B-Chat](https://huggingface.co/01-ai/Yi-6B-Chat)
Most
[Mistral](https://huggingface.co/models?library=transformers,safetensors&other=mistral&sort=trending)

View File

@@ -5,7 +5,7 @@ import inspect
import json
from dataclasses import dataclass
from pathlib import Path
from typing import Optional, Tuple
from typing import Dict, Optional, Tuple, Union
import mlx.core as mx
import mlx.nn as nn
@@ -26,11 +26,20 @@ class ModelArgs:
rope_theta: float = 10000
rope_traditional: bool = False
model_type: str = None
rope_scaling: Optional[Dict[str, Union[float, str]]] = None
def __post_init__(self):
if self.num_key_value_heads is None:
self.num_key_value_heads = self.num_attention_heads
if self.rope_scaling:
required_keys = {"factor", "type"}
if not all(key in self.rope_scaling for key in required_keys):
raise ValueError(f"rope_scaling must contain keys {required_keys}")
if self.rope_scaling["type"] != "linear":
raise ValueError("rope_scaling 'type' currently only supports 'linear'")
@classmethod
def from_dict(cls, params):
return cls(
@@ -73,8 +82,16 @@ class Attention(nn.Module):
self.k_proj = nn.Linear(dim, n_kv_heads * head_dim, bias=False)
self.v_proj = nn.Linear(dim, n_kv_heads * head_dim, bias=False)
self.o_proj = nn.Linear(n_heads * head_dim, dim, bias=False)
rope_scale = (
1 / args.rope_scaling["factor"]
if args.rope_scaling is not None and args.rope_scaling["type"] == "linear"
else 1
)
self.rope = nn.RoPE(
head_dim, traditional=args.rope_traditional, base=args.rope_theta
head_dim,
traditional=args.rope_traditional,
base=args.rope_theta,
scale=rope_scale,
)
def __call__(

View File

@@ -1,3 +1,4 @@
mlx>=0.0.7
numpy
transformers
protobuf