Spelling fixes in transformer.py (#59)

2025-12-16 01:49:05 +08:00 · 2023-12-07 22:32:09 +01:00
parent bf410cb85e
commit d11d77e581
1 changed files with 2 additions and 2 deletions
--- a/python/mlx/nn/layers/transformer.py
+++ b/python/mlx/nn/layers/transformer.py
@@ -16,7 +16,7 @@ class MultiHeadAttention(Module):
    new values by aggregating information from the input values according to
    the similarities of the input queries and keys.
-    All inputs as well as the output are lineary projected without biases.
+    All inputs as well as the output are linearly projected without biases.
    MultiHeadAttention also expects an additive attention mask that should be
    broadcastable with (batch, num_heads, # queries, # keys). The mask should
@@ -48,7 +48,7 @@ class MultiHeadAttention(Module):
        if (dims % num_heads) != 0:
            raise ValueError(
-                f"The input feature dimensions should be divisble by the number of heads ({dims} % {num_heads}) != 0"
+                f"The input feature dimensions should be divisible by the number of heads ({dims} % {num_heads}) != 0"
            )
        query_input_dims = query_input_dims or dims