diff --git a/llms/mlx_lm/models/cohere2.py b/llms/mlx_lm/models/cohere2.py index d489d2b9..19bfa6b6 100644 --- a/llms/mlx_lm/models/cohere2.py +++ b/llms/mlx_lm/models/cohere2.py @@ -158,7 +158,7 @@ class CohereModel(nn.Module): if cache is None: cache = [None] * len(self.layers) - + if mask is None: j = self.args.sliding_window_pattern mask = create_attention_mask(h, cache[j - 1 : j])