mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-10-23 05:58:07 +08:00
add speculative decoding example for llama (#149)
* speculative decoding * add sample 0 * spec decode gives same results as regular decode * rebase * use accept reject criteria * switch to t5 * update readme * readme nit * nits * nits * nits --------- Co-authored-by: Benjamin Anderson <benjamin@Benjamins-MBP.lan> Co-authored-by: Awni Hannun <awni@apple.com>
This commit is contained in:

committed by
GitHub

parent
07c163d9d9
commit
09566c7257
1
t5/t5.py
1
t5/t5.py
@@ -125,7 +125,6 @@ class MultiHeadAttention(nn.Module):
|
||||
values = mx.concatenate([value_cache, values], axis=2)
|
||||
|
||||
# Dimensions are [batch x num heads x sequence x hidden dim]
|
||||
queries = queries
|
||||
scores = queries @ keys
|
||||
if mask is not None:
|
||||
scores = scores + mask.astype(scores.dtype)
|
||||
|
Reference in New Issue
Block a user