fix transformer (#1327)

2025-12-15 17:39:05 +08:00 · 2024-08-13 16:04:26 -07:00
parent eaaea02010
commit 63ae767232
2 changed files with 4 additions and 2 deletions
--- a/python/mlx/nn/layers/init.py
+++ b/python/mlx/nn/layers/init.py
@@ -72,6 +72,8 @@ from mlx.nn.layers.recurrent import GRU, LSTM, RNN
 from mlx.nn.layers.transformer import (
    MultiHeadAttention,
    Transformer,
+    TransformerDecoder,
+    TransformerDecoderLayer,
    TransformerEncoder,
    TransformerEncoderLayer,
 )
--- a/python/mlx/nn/layers/transformer.py
+++ b/python/mlx/nn/layers/transformer.py
@@ -147,9 +147,9 @@ class TransformerEncoderLayer(Module):
        else:
            y = self.attention(x, x, x, mask)
            y = self.dropout1(y)
-            y = self.ln1(x + y)
+            x = self.ln1(x + y)

-            y = self.linear1(y)
+            y = self.linear1(x)
            y = self.activation(y)
            y = self.dropout2(y)
            y = self.linear2(y)