mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-09-01 04:14:38 +08:00
Add support for deepseek coder v2 lite (#882)
* feat: add support for deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct * fix softmax + some cleanup * more nits * fix rope * fix original_max_position_embeddings in rope * fix original_max_position_embeddings in rope config * add group greedy --------- Co-authored-by: Awni Hannun <awni@apple.com>
This commit is contained in:
@@ -15,7 +15,12 @@ class KVCache:
|
||||
|
||||
def __init__(self, head_dim, n_kv_heads):
|
||||
self.n_kv_heads = n_kv_heads
|
||||
self.head_dim = head_dim
|
||||
if isinstance(head_dim, int):
|
||||
self.k_head_dim = self.v_head_dim = head_dim
|
||||
elif isinstance(head_dim, tuple) and len(head_dim) == 2:
|
||||
self.k_head_dim, self.v_head_dim = head_dim
|
||||
else:
|
||||
raise ValueError("head_dim must be an int or a tuple of two ints")
|
||||
self.keys = None
|
||||
self.values = None
|
||||
self.offset = 0
|
||||
@@ -25,9 +30,10 @@ class KVCache:
|
||||
prev = self.offset
|
||||
if self.keys is None or (prev + keys.shape[2]) > self.keys.shape[2]:
|
||||
n_steps = (self.step + keys.shape[2] - 1) // self.step
|
||||
shape = (1, self.n_kv_heads, n_steps * self.step, self.head_dim)
|
||||
new_k = mx.zeros(shape, keys.dtype)
|
||||
new_v = mx.zeros(shape, values.dtype)
|
||||
k_shape = (1, self.n_kv_heads, n_steps * self.step, self.k_head_dim)
|
||||
v_shape = (1, self.n_kv_heads, n_steps * self.step, self.v_head_dim)
|
||||
new_k = mx.zeros(k_shape, keys.dtype)
|
||||
new_v = mx.zeros(v_shape, values.dtype)
|
||||
if self.keys is not None:
|
||||
if prev % self.step != 0:
|
||||
self.keys = self.keys[..., :prev, :]
|
||||
|
Reference in New Issue
Block a user