From d11d77e58124825229d51157bddd79bf6aadfd95 Mon Sep 17 00:00:00 2001 From: Zach Schillaci Date: Thu, 7 Dec 2023 22:32:09 +0100 Subject: [PATCH] Spelling fixes in transformer.py (#59) --- python/mlx/nn/layers/transformer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/mlx/nn/layers/transformer.py b/python/mlx/nn/layers/transformer.py index 2c586cd3e..68d9303ac 100644 --- a/python/mlx/nn/layers/transformer.py +++ b/python/mlx/nn/layers/transformer.py @@ -16,7 +16,7 @@ class MultiHeadAttention(Module): new values by aggregating information from the input values according to the similarities of the input queries and keys. - All inputs as well as the output are lineary projected without biases. + All inputs as well as the output are linearly projected without biases. MultiHeadAttention also expects an additive attention mask that should be broadcastable with (batch, num_heads, # queries, # keys). The mask should @@ -48,7 +48,7 @@ class MultiHeadAttention(Module): if (dims % num_heads) != 0: raise ValueError( - f"The input feature dimensions should be divisble by the number of heads ({dims} % {num_heads}) != 0" + f"The input feature dimensions should be divisible by the number of heads ({dims} % {num_heads}) != 0" ) query_input_dims = query_input_dims or dims