From 8b6beea3be59ccda9d876934b6ae32abe9902dc0 Mon Sep 17 00:00:00 2001 From: Goekdeniz-Guelmez Date: Tue, 4 Mar 2025 21:26:21 +0100 Subject: [PATCH] faster generation --- llms/mlx_lm/models/olmoe.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llms/mlx_lm/models/olmoe.py b/llms/mlx_lm/models/olmoe.py index f0ce097a..73d9b596 100644 --- a/llms/mlx_lm/models/olmoe.py +++ b/llms/mlx_lm/models/olmoe.py @@ -124,9 +124,8 @@ class OlmoeSparseMoeBlock(nn.Module): final_hidden_states = mx.zeros_like(x) for expert_idx in range(self.num_experts): expert_weights = routing_weights[:, expert_idx:expert_idx+1] - if mx.max(expert_weights) > 1e-5: - expert_output = self.experts[expert_idx](x) - final_hidden_states += expert_output * expert_weights + expert_output = self.experts[expert_idx](x) + final_hidden_states += expert_output * expert_weights return final_hidden_states.reshape(B, L, D)