From d11d77e58124825229d51157bddd79bf6aadfd95 Mon Sep 17 00:00:00 2001
From: Zach Schillaci <zschillaci@brandeis.edu>
Date: Thu, 7 Dec 2023 22:32:09 +0100
Subject: [PATCH] Spelling fixes in transformer.py (#59)

---
 python/mlx/nn/layers/transformer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/mlx/nn/layers/transformer.py b/python/mlx/nn/layers/transformer.py
index 2c586cd3e..68d9303ac 100644
--- a/python/mlx/nn/layers/transformer.py
+++ b/python/mlx/nn/layers/transformer.py
@@ -16,7 +16,7 @@ class MultiHeadAttention(Module):
     new values by aggregating information from the input values according to
     the similarities of the input queries and keys.
 
-    All inputs as well as the output are lineary projected without biases.
+    All inputs as well as the output are linearly projected without biases.
 
     MultiHeadAttention also expects an additive attention mask that should be
     broadcastable with (batch, num_heads, # queries, # keys). The mask should
@@ -48,7 +48,7 @@ class MultiHeadAttention(Module):
 
         if (dims % num_heads) != 0:
             raise ValueError(
-                f"The input feature dimensions should be divisble by the number of heads ({dims} % {num_heads}) != 0"
+                f"The input feature dimensions should be divisible by the number of heads ({dims} % {num_heads}) != 0"
             )
 
         query_input_dims = query_input_dims or dims