From 5d8b36ce7c208d1e722216da0003c6c48921e406 Mon Sep 17 00:00:00 2001 From: Prince Canuma Date: Sat, 14 Dec 2024 16:22:00 +0100 Subject: [PATCH] revert to act_fn to silu --- llms/mlx_lm/models/cohere2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llms/mlx_lm/models/cohere2.py b/llms/mlx_lm/models/cohere2.py index ae19f4d8..b4068679 100644 --- a/llms/mlx_lm/models/cohere2.py +++ b/llms/mlx_lm/models/cohere2.py @@ -115,7 +115,7 @@ class MLP(nn.Module): self.down_proj = nn.Linear(hidden_dim, dim, bias=False) def __call__(self, x): - return self.down_proj(nn.gelu(self.gate_proj(x)) * self.up_proj(x)) + return self.down_proj(nn.silu(self.gate_proj(x)) * self.up_proj(x)) class TransformerBlock(nn.Module):