From 8b6beea3be59ccda9d876934b6ae32abe9902dc0 Mon Sep 17 00:00:00 2001
From: Goekdeniz-Guelmez <Gulmezdeniz1999@gmail.com>
Date: Tue, 4 Mar 2025 21:26:21 +0100
Subject: [PATCH] faster generation

---
 llms/mlx_lm/models/olmoe.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/llms/mlx_lm/models/olmoe.py b/llms/mlx_lm/models/olmoe.py
index f0ce097a..73d9b596 100644
--- a/llms/mlx_lm/models/olmoe.py
+++ b/llms/mlx_lm/models/olmoe.py
@@ -124,9 +124,8 @@ class OlmoeSparseMoeBlock(nn.Module):
         final_hidden_states = mx.zeros_like(x)
         for expert_idx in range(self.num_experts):
             expert_weights = routing_weights[:, expert_idx:expert_idx+1]
-            if mx.max(expert_weights) > 1e-5:
-                expert_output = self.experts[expert_idx](x)
-                final_hidden_states += expert_output * expert_weights
+            expert_output = self.experts[expert_idx](x)
+            final_hidden_states += expert_output * expert_weights
         return final_hidden_states.reshape(B, L, D)