From e1072b5300ada6ca9c2d9f26ac9148d854df9d09 Mon Sep 17 00:00:00 2001 From: Chime Ogbuji Date: Mon, 23 Dec 2024 12:02:37 -0500 Subject: [PATCH] Add HF overrides for methods needed by added options --- llms/mlx_lm/evaluate.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/llms/mlx_lm/evaluate.py b/llms/mlx_lm/evaluate.py index 850055f6..f78c3998 100644 --- a/llms/mlx_lm/evaluate.py +++ b/llms/mlx_lm/evaluate.py @@ -83,6 +83,9 @@ class MLXLM(LM): self._model, self._tokenizer = load(path_or_hf_repo) self._max_tokens = max_tokens or self._tokenizer.model_max_length + # Needed by HF implementation methods (tokenizer_name, apply_chat_template, and, tok_encode) + self.tokenizer = self._tokenizer + def _score_fn(self, inputs, tokenize=True, step_size=32): if tokenize: inputs = self._tokenizer.encode(inputs) @@ -221,6 +224,10 @@ class MLXLM(LM): ) return [(r[0], r[1] == r[2]) for r in results] + tokenizer_name = lm_eval.models.huggingface.HFLM.tokenizer_name + apply_chat_template = lm_eval.models.huggingface.HFLM.apply_chat_template + tok_encode = lm_eval.models.huggingface.HFLM.tok_encode + def loglikelihood_rolling(self, requests) -> list[float]: """Compute full log-likelihood of a string, with no truncation, for perplexity computation - We will use the full max context length of the model.