mirror of
https://github.com/ml-explore/mlx.git
synced 2025-09-20 12:08:14 +08:00
Added ALiBi implementation (#232)
This commit is contained in:
@@ -37,7 +37,7 @@ from mlx.nn.layers.dropout import Dropout
|
||||
from mlx.nn.layers.embedding import Embedding
|
||||
from mlx.nn.layers.linear import Linear
|
||||
from mlx.nn.layers.normalization import GroupNorm, LayerNorm, RMSNorm
|
||||
from mlx.nn.layers.positional_encoding import RoPE, SinusoidalPositionalEncoding
|
||||
from mlx.nn.layers.positional_encoding import ALiBi, RoPE, SinusoidalPositionalEncoding
|
||||
from mlx.nn.layers.quantized import QuantizedLinear
|
||||
from mlx.nn.layers.transformer import (
|
||||
MultiHeadAttention,
|
||||
|
@@ -142,3 +142,40 @@ class SinusoidalPositionalEncoding(Module):
|
||||
y = y * self.scale
|
||||
|
||||
return y
|
||||
|
||||
|
||||
class ALiBi(Module):
|
||||
@staticmethod
|
||||
def create_alibi_matrix(
|
||||
q_sequence_length: int,
|
||||
k_sequence_length: int,
|
||||
num_heads: int,
|
||||
offset: int,
|
||||
dtype=mx.float32,
|
||||
):
|
||||
x1 = mx.arange(offset, q_sequence_length)
|
||||
x2 = mx.arange(0, k_sequence_length)
|
||||
distance_matrix = -mx.abs(
|
||||
mx.expand_dims(x1[:, None] - x2[None, :], axis=(0, 1))
|
||||
)
|
||||
alibi_slope = ALiBi.create_alibi_slope(num_heads=num_heads)
|
||||
alibi_mask = (distance_matrix * alibi_slope).astype(dtype)
|
||||
return alibi_mask
|
||||
|
||||
@staticmethod
|
||||
def create_alibi_slope(num_heads):
|
||||
x = (2**8) ** (1 / num_heads)
|
||||
out = mx.power(x, -mx.arange(1, num_heads + 1))
|
||||
return mx.expand_dims(out, axis=(-1, -2))
|
||||
|
||||
def __call__(self, attention_scores, offset=0, mask=None):
|
||||
alibi_mask = ALiBi.create_alibi_matrix(
|
||||
q_sequence_length=attention_scores.shape[-2] + offset,
|
||||
k_sequence_length=attention_scores.shape[-1],
|
||||
num_heads=attention_scores.shape[1],
|
||||
offset=offset,
|
||||
dtype=attention_scores.dtype,
|
||||
)
|
||||
if mask is not None:
|
||||
alibi_mask = alibi_mask + mask
|
||||
return attention_scores + alibi_mask
|
||||
|
Reference in New Issue
Block a user